1 Set the working environment

1.1 Reset workspace and load libraries

This analysis uses ABCD Release 4.0.

This script is modified based on the social economics status and developmental analysis with elastic net version 2.0.

rm(list=ls())
gc()
library(tidyverse)
library(qgraph)
library(pander)
library(summarytools)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(tidymodels)
library(knitr)
library(extrafont)
## for poisson class of elastic net
library(poissonreg)
library("sva")
### plotting libraries
library(ggtext)
library(ggpubr)
library(cowplot)
library(ggthemes)
### package for pls analysis (all packages are necessary for the model to run)
library("pls")
library("mixOmics")
library(plsmod)

1.2 Setting up paths

Using ABCD 4.0

set up parallel

# parallel for ubuntu
#doParallel::registerDoParallel(cores=30)  

### parallel library for mac
theme_set(theme_bw() + theme(panel.grid = element_blank()))
## parallel processing number of cores register
all_cores <- parallel::detectCores(logical = FALSE) - 5

doParallel::registerDoParallel(cores = all_cores)

## this one works for ubuntu but slow
#library(doFuture)
#registerDoFuture()
#plan(multicore(workers = 30))

### parallel for windows

#library(doFuture)
#registerDoFuture()
#plan(multisession(workers = 30))

2 Load up data files

2.1 Family relationship

ACS <-read_csv(paste0(dataFold,"ACSPSW03_DATA_TABLE.csv")) 
## Rows: 23101 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): SUBJECTKEY, SRC_SUBJECT_ID, INTERVIEW_DATE, SEX, EVENTNAME, GENETI...
## dbl (18): ACSPSW03_ID, DATASET_ID, INTERVIEW_AGE, RACE_ETHNICITY, REL_FAMILY...
## lgl  (6): GENETIC_PAIRED_SUBJECTID_4, GENETIC_PI_HAT_3, GENETIC_PI_HAT_4, GE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#knitr::kable(glimpse(ACS))

# guardian-report relationship
# Relationship of the participant in his or her family
# 0 = single; 1 = sibling; 2 = twin; 3 = triplet
# ACS %>% count(REL_RELATIONSHIP)

ACSselected <- ACS %>% 
  dplyr::select(SUBJECTKEY, EVENTNAME, INTERVIEW_AGE,
                              REL_FAMILY_ID, ACS_RAKED_PROPENSITY_SCORE) %>%
  mutate(REL_FAMILY_ID = as.factor(REL_FAMILY_ID))

ACSselected %>%
 filter(EVENTNAME =="baseline_year_1_arm_1") %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 5
_______________________
Column type frequency:
character 2
factor 1
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
REL_FAMILY_ID 0 1 FALSE 9854 373: 5, 749: 4, 11: 3, 400: 3

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
INTERVIEW_AGE 0 1 118.98 7.50 107.00 112.00 119.00 126.00 133.00 ▇▆▆▆▆
ACS_RAKED_PROPENSITY_SCORE 0 1 691.34 350.98 161.36 449.35 619.31 821.74 1778.92 ▅▇▂▂▁

2.2 site information

###loading site and scanner information
Siteinfo <-tibble::as_tibble(read.csv(paste0(dataFold, "ABCD_LT01_DATA_TABLE.csv")))

vision index

vision_idx <- as_tibble(read.csv(paste0(dataFold,"ABCD_SVS01_DATA_TABLE.CSV"))) %>% 
  mutate(visionProb = ifelse(SNELLEN_VA_Y == 0 | SNELLEN_VA_Y == 1 | VIS_FLG == 2, 1, 0))

#vision_idx %>% select(SNELLEN_VA_Y, VIS_FLG, visionProb) %>%  arrange(SNELLEN_VA_Y)

change the wrong site manually based on the: “Release Notes: Adolescent Brain Cognitive Development Study ℠ (ABCD Study ® ) Data Release 4.0 Changes and Known Issues”

only fixed the baseline and two year followup that is used in the analysis

Siteinfo_fixed <- Siteinfo



site_fix <- readRDS(paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/site_fix', '.RData'))


for(i in 1:dim(site_fix)[1]){
  fix_site_id  <- site_fix$SUBJECTKEY[i]
  fix_site_event <- site_fix$EVENTNAME[i]
  fix_site <- site_fix$SITE_ID_L[i]
  Siteinfo_fixed$SITE_ID_L[which(Siteinfo_fixed$SUBJECTKEY== fix_site_id& Siteinfo_fixed$EVENTNAME == fix_site_event)] <- fix_site
}

Siteinfo <-Siteinfo_fixed 

2.3 Setting up vector of names

TaskDVs1Batch = c("NIHTBX_PICVOCAB_UNCORRECTED", 
                  "NIHTBX_READING_UNCORRECTED",
              "NIHTBX_FLANKER_UNCORRECTED",
              "NIHTBX_PATTERN_UNCORRECTED",
              "NIHTBX_PICTURE_UNCORRECTED",
               "PEA_RAVLT_LD_TRIAL_VII_TC")


subj_info <-  c("SUBJECTKEY","EVENTNAME","SITE_ID_L")   

Loading up pre-computed gfactor

baseline_train_gfactor <- purrr::map(gfactor_list,"output_train_baseline")
baseline_test_gfactor <- purrr::map(gfactor_list,"output_test_baseline")
followup_train_gfactor <- purrr::map(gfactor_list,"output_train_followup")
followup_test_gfactor <- purrr::map(gfactor_list,"output_test_followup")

2.4 Physical

basically only sleep related are good

not very relevant: ABCD Sum Scores Traumatic Brain Injury abcd_tbi01 ABCD Longitudinal Summary Scores Traumatic Brain Injury abcd_lsstbi01 ABCD Sum Scores Parent Sports and Activities Involvement abcd__spacss01 ABCD Longitudinal Summary Scores Sports Activity abcd_lsssa01 ABCD Sum Scores Parent Medical History abcd_medhxss01 ABCD Longitudinal Summary Scores Medical History abcd_lssmh01 ABCD Sum Scores Developmental History abcd_devhxss01

this is mainly about puberty: ABCD Sum Scores Physical Health Youth abcd_ssphy01

sleep scores: ABCD Parent Sleep Disturbance Scale for Children abcd_sds01 Diet only at one year follow up: ABCD Child Nutrition Assessment abcd_cna01 sum sleep score + diet ABCD Sum Scores Physical Health Parent abcd_ssphp01

#ABCD Parent Sleep Disturbance Scale for Children
#SLEEPDISTURB1_P
#How many hours of sleep does your child get on most nights? ¿Cuántas horas duerme su niño(a) la mayoría de las noches?
#1 = 9-11 hours/ 9 a 11 horas; 2 = 8-9 hours /8 a 9 horas; 3 = 7-8 hours /7 a 8 horas; 4 = 5-7 hours /5 a 7 horas; 5 = Less than 5 hours/ Menos de 5 horas// Consider each question pertaining to the PAST 6 MONTHS of the child's life
#SLEEPDISTURB2_P
#How long after going to bed does your child usually fall asleep? Después de acostarse ¿generalmente cuánto tiempo tarda su niño(a) en quedarse dormido(a)?
#1 = Less than 15 minutes /Menos de 15 minutos; 2 = 15-30 minutes 15 a 30 minutos; 3 = 30-45 minutes /30 a 45 minutos; 4 = 45-60 minutes /45 a 60 minutos; 5 = More than 60 minutes /M√°s de 60 minutos//Consider each question pertaining to the PAST 6 MONTHS of the child's life
sleepDis <-as_tibble(read.csv(paste0(dataFold,"ABCD_SDS01_DATA_TABLE.csv"))) %>%
    distinct(dplyr::select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# sleepDis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% 
#   distinct(select(.,-ABCD_SDS01_ID, -DATASET_ID),.keep_all = TRUE) %>% 
#   arrange(SUBJECTKEY)

sleepDis %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SLEEPDISTURB1_P 5 1 1.72 0.81 1 1 2 2 5 ▇▆▂▁▁
SLEEPDISTURB2_P 5 1 1.93 0.98 1 1 2 2 5 ▇▇▂▁▁
sleepDis %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(SLEEPDISTURB1_P,SLEEPDISTURB2_P) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 2
_______________________
Column type frequency:
numeric 2
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SLEEPDISTURB1_P 74 0.99 1.99 0.87 1 1 2 2 5 ▆▇▃▁▁
SLEEPDISTURB2_P 74 0.99 2.05 1.05 1 1 2 3 5 ▇▇▃▁▁
PhysicalSum <-as_tibble(read.csv(paste0(dataFold,"ABCD_SSPHP01_DATA_TABLE.csv"))) 

PhysicalSum %>% dplyr::select(-1:-8)  %>% 
   skimr::skim()
Data summary
Name Piped data
Number of rows 39766
Number of columns 36
_______________________
Column type frequency:
numeric 36
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SDS_P_SS_DIMS 193 1.00 11.95 3.79 7 9 11 14 35 ▇▃▁▁▁
SDS_P_SS_DIMS_NM 0 1.00 0.03 0.48 0 0 0 0 7 ▇▁▁▁▁
SDS_P_SS_DIMS_NT 0 1.00 7.00 0.00 7 7 7 7 7 ▁▁▇▁▁
SDS_P_SS_SBD 193 1.00 3.72 1.19 3 3 3 4 15 ▇▁▁▁▁
SDS_P_SS_SBD_NM 0 1.00 0.01 0.21 0 0 0 0 3 ▇▁▁▁▁
SDS_P_SS_SBD_NT 0 1.00 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
SDS_P_SS_DA 192 1.00 3.35 0.80 3 3 3 3 15 ▇▁▁▁▁
SDS_P_SS_DA_NM 0 1.00 0.01 0.21 0 0 0 0 3 ▇▁▁▁▁
SDS_P_SS_DA_NT 0 1.00 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
SDS_P_SS_SWTD 237 0.99 7.94 2.48 6 6 7 9 30 ▇▁▁▁▁
SDS_P_SS_SWTD_NM 0 1.00 0.03 0.42 0 0 0 0 6 ▇▁▁▁▁
SDS_P_SS_SWTD_NT 0 1.00 6.00 0.00 6 6 6 6 6 ▁▁▇▁▁
SDS_P_SS_DOES 193 1.00 7.10 2.54 5 5 6 8 25 ▇▁▁▁▁
SDS_P_SS_DOES_NM 0 1.00 0.02 0.35 0 0 0 0 5 ▇▁▁▁▁
SDS_P_SS_DOES_NT 0 1.00 5.00 0.00 5 5 5 5 5 ▁▁▇▁▁
SDS_P_SS_SHY 193 1.00 2.38 1.06 2 2 2 2 10 ▇▁▁▁▁
SDS_P_SS_SHY_NM 0 1.00 0.01 0.14 0 0 0 0 2 ▇▁▁▁▁
SDS_P_SS_SHY_NT 0 1.00 2.00 0.00 2 2 2 2 2 ▁▁▇▁▁
SDS_P_SS_TOTAL 238 0.99 36.43 8.06 26 31 35 40 126 ▇▁▁▁▁
SDS_P_SS_TOTAL_NM 0 1.00 0.13 1.80 0 0 0 0 26 ▇▁▁▁▁
SDS_P_SS_TOTAL_NT 0 1.00 26.00 0.00 26 26 26 26 26 ▁▁▇▁▁
PDS_P_SS_MALE_CATEGORY 29433 0.26 1.46 0.68 1 1 1 2 5 ▇▃▁▁▁
PDS_P_SS_MALE_CAT_NM 29016 0.27 0.05 0.26 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_MALE_CAT_NT 29016 0.27 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY 30131 0.24 2.42 0.95 1 2 3 3 5 ▃▃▇▂▁
PDS_P_SS_FEMALE_CAT_NM 29699 0.25 0.05 0.28 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_NT 29699 0.25 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
CNA_P_SS_SUM 29883 0.25 8.08 2.45 0 6 8 10 14 ▁▂▇▇▂
CNA_P_SS_SUM_NM 17825 0.55 6.98 6.91 0 0 3 14 14 ▇▁▁▁▇
CNA_P_SS_SUM_NT 17825 0.55 14.00 0.00 14 14 14 14 14 ▁▁▇▁▁
PDS_P_SS_FEMALE_CAT_2_NM 20807 0.48 0.08 0.37 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_FEMALE_CAT_2_NT 20807 0.48 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
PDS_P_SS_FEMALE_CATEGORY_2 21864 0.45 2.83 1.01 1 2 3 4 5 ▂▂▇▅▁
PDS_P_SS_MALE_CATEGORY_2 19971 0.50 1.76 0.88 1 1 2 2 5 ▇▅▂▁▁
PDS_P_SS_MALE_CAT_2_NM 18935 0.52 0.07 0.36 0 0 0 0 3 ▇▁▁▁▁
PDS_P_SS_MALE_CAT_2_NT 18935 0.52 3.00 0.00 3 3 3 3 3 ▁▁▇▁▁
# sds_p_ss_dims
# Disorders of Initiating and Maintaining Sleep (DIMS) SUM:  sleepdisturb1_p +  sleepdisturb2_p + sleepdisturb3_p + sleepdisturb4_p + sleepdisturb5_p + sleepdisturb10_p + sleepdisturb11_p;  Validation: All items must be answered
# 
# sds_p_ss_sbd
# Sleep Breathing disorders (SBD):  SUM sleepdisturb13_p +  sleepdisturb14_p + sleepdisturb15_p; Validation: All items must be answered
# 
# sds_p_ss_da
# Disorder of Arousal (DA) SUM: sleepdisturb17_p +  sleepdisturb20_p + sleepdisturb21_p;  Validation: All items must be answered
# 
# sds_p_ss_swtd
# Sleep-Wake transition Disorders (SWTD) SUM: sleepdisturb6_p + sleepdisturb7_p + sleepdisturb8_p + sleepdisturb12_p +  sleepdisturb18_p + sleepdisturb19_p; Validation: All items must be answered
# 
# sds_p_ss_does
# Disorders of Excessive Somnolence (DOES) SUM:  sleepdisturb22_p + sleepdisturb23_p +  sleepdisturb24_p +  sleepdisturb25_p + sleepdisturb26_p; Validation: All items must be answered
# 
# sds_p_ss_shy
# Sleep Hyperhydrosis (SHY) SUM: sleepdisturb9_p + sleepdisturb16_p; Validation: All items must be answered
# 
# sds_p_ss_total
# Total Score (Sum of 6 Factors): sds_p_ss_dims + sds_p_ss_sbd + sds_p_ss_da + sds_p_ss_swtd + sds_p_ss_does + sds_p_ss_shy; Validation: All items must be answered

sleepSum <- sleepDis %>% full_join(PhysicalSum, by = c("SUBJECTKEY", "EVENTNAME")) %>%
  dplyr::select(SUBJECTKEY, EVENTNAME, SLEEPDISTURB1_P, SLEEPDISTURB2_P,
         SDS_P_SS_DIMS, SDS_P_SS_SBD, SDS_P_SS_DA, SDS_P_SS_SWTD, SDS_P_SS_DOES, SDS_P_SS_SHY, SDS_P_SS_TOTAL) %>%
  rename(sleep_hours = SLEEPDISTURB1_P) %>%
  rename(sleep_disturb = SLEEPDISTURB2_P) %>%
  rename(sleep_initiate_maintain = SDS_P_SS_DIMS) %>%
  rename(sleep_breath = SDS_P_SS_SBD) %>%
  rename(sleep_arousal = SDS_P_SS_DA) %>%
  rename(sleep_transition = SDS_P_SS_SWTD) %>%
  rename(sleep_somnolence = SDS_P_SS_DOES) %>%
  rename(sleep_hyperhydrosis = SDS_P_SS_SHY) %>%
  rename(sleep_total = SDS_P_SS_TOTAL) 

sleepSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 9
_______________________
Column type frequency:
numeric 9
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sleep_hours 5 1 1.72 0.81 1 1 2 2 5 ▇▆▂▁▁
sleep_disturb 5 1 1.93 0.98 1 1 2 2 5 ▇▇▂▁▁
sleep_initiate_maintain 5 1 11.76 3.75 7 9 11 13 35 ▇▃▁▁▁
sleep_breath 5 1 3.77 1.26 3 3 3 4 15 ▇▁▁▁▁
sleep_arousal 5 1 3.44 0.92 3 3 3 4 15 ▇▁▁▁▁
sleep_transition 32 1 8.18 2.63 6 6 7 9 30 ▇▁▁▁▁
sleep_somnolence 6 1 6.95 2.44 5 5 6 8 25 ▇▁▁▁▁
sleep_hyperhydrosis 5 1 2.44 1.18 2 2 2 2 10 ▇▁▁▁▁
sleep_total 33 1 36.54 8.24 26 31 35 40 126 ▇▁▁▁▁
sleepSum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 9
_______________________
Column type frequency:
numeric 9
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sleep_hours 74 0.99 1.99 0.87 1 1 2 2 5 ▆▇▃▁▁
sleep_disturb 74 0.99 2.05 1.05 1 1 2 3 5 ▇▇▃▁▁
sleep_initiate_maintain 74 0.99 12.04 3.77 7 9 11 14 34 ▇▃▁▁▁
sleep_breath 74 0.99 3.69 1.16 3 3 3 4 15 ▇▁▁▁▁
sleep_arousal 74 0.99 3.31 0.72 3 3 3 3 11 ▇▁▁▁▁
sleep_transition 75 0.99 7.80 2.43 6 6 7 9 28 ▇▁▁▁▁
sleep_somnolence 74 0.99 7.14 2.59 5 5 6 8 25 ▇▁▁▁▁
sleep_hyperhydrosis 74 0.99 2.34 0.99 2 2 2 2 10 ▇▁▁▁▁
sleep_total 75 0.99 36.33 8.06 26 31 34 40 105 ▇▂▁▁▁

2.5 behavioral

2.5.1 screen time

ABCD Youth Screen Time Survey abcd_stq01

Youth Screen Time Survey This measure includes customized questions about the overall amount of time that the youth spends using visual media, on a typical weekday and weekend day. Media activities assessed include: (1) Watching TV shows or movies; (2) Watching videos (such as YouTube); (3) Playing video games on a computer, console, phone or other device; (4) Texting on a cell phone, tablet, or computer; (5) Visiting social networking sites like Facebook, Twitter, Instagram; (6) Video chat. Seven response options were: none, < 30 minutes, 30 minutes, 1 hour, 2 hours, 3 hours, and 4+ hours.

youthScreen <-as_tibble(read.csv(paste0(dataFold,"ABCD_STQ01_DATA_TABLE.csv")))  
# filter(EVENTNAME =="baseline_year_1_arm_1") 

#On a typical weekend/weekday, how many hours do you
#0 = None; .25 = < 30 minutes; 0.5 = 30 minutes; 1 = 1 hour; 2 = 2 hours; 3 = 3 hours; 4 = 4+ hours //Example: 1½ hours would be coded as 1 hour, rather than 2 hours.  
#How often do you play mature-rated video games (e.g., Call of Duty, Grand Theft Auto, Assassin's Creed, etc.)?
#How often do you watch R-rated movies?

youthScreenAdded <- youthScreen %>% 
  mutate(wkdySum_Screen = rowSums(dplyr::select(.,ends_with("WKDY_Y")))) %>% 
  mutate(wkndSum_Screen = rowSums(dplyr::select(.,ends_with("WKND_Y")))) %>%
  rename(matureGames_Screen = SCREEN13_Y) %>%
  rename(matureMovies_Screen = SCREEN14_Y) 

youthScreenSum <- youthScreenAdded %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, ends_with("_Screen"))

youthScreenSum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
matureGames_Screen 20 1 0.57 0.87 0 0.00 0.0 1.00 3 ▇▃▁▁▁
matureMovies_Screen 21 1 0.38 0.64 0 0.00 0.0 1.00 3 ▇▃▁▁▁
wkdySum_Screen 38 1 3.46 3.10 0 1.25 2.5 4.75 24 ▇▂▁▁▁
wkndSum_Screen 43 1 4.62 3.63 0 2.00 3.5 6.25 24 ▇▃▁▁▁
youthScreenSum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
matureGames_Screen 29 1 0.62 0.88 0 0 0 1 3 ▇▃▁▂▁
matureMovies_Screen 37 1 0.49 0.65 0 0 0 1 3 ▇▅▁▁▁
wkdySum_Screen 10414 0 NaN NA NA NA NA NA NA
wkndSum_Screen 10414 0 NaN NA NA NA NA NA NA

2.5.2 maternal substance use

#ABCD Developmental History Questionnaire
DevHis <-as_tibble(read.csv(paste0(dataFold,"DHX01_DATA_TABLE.csv"))) %>% 
 #filter(VISIT =="baseline_year_1_arm_1")  %>% 
  rename(EVENTNAME = VISIT)

#glimpse(DevHis)


DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(starts_with("DEVHX_8")) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 83
_______________________
Column type frequency:
character 5
logical 10
numeric 68
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
DEVHX_8_RXNORM_MED1 0 1 0 190 10578 679 0
DEVHX_8_RXNORM_MED2 0 1 0 118 11691 130 0
DEVHX_8_RXNORM_MED3 0 1 0 79 11831 37 0
DEVHX_8_OTHER1_NAME_OTH 0 1 0 36 11854 19 0
DEVHX_8_OTHER3_NAME_OTH 0 1 0 30 11872 5 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEVHX_8_PRESCRIPT_YES 11876 0 NaN :
DEVHX_8_OTHER2_NAME_OTH 11876 0 NaN :
DEVHX_8_OTHER4_NAME_OTH 11876 0 NaN :
DEVHX_8_OTHER4_TIMES 11876 0 NaN :
DEVHX_8_OTHER4_AMT 11876 0 NaN :
DEVHX_8_OTHER4_UNIT 11876 0 NaN :
DEVHX_8_OTHER5_NAME_OTH 11876 0 NaN :
DEVHX_8_OTHER5_TIMES 11876 0 NaN :
DEVHX_8_OTHER5_AMT 11876 0 NaN :
DEVHX_8_OTHER5_UNIT 11876 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEVHX_8_PRESCRIPT_MED 5 1.00 74.00 261.43 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_MED1_PRN 11245 0.05 36.69 187.31 0.00 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_MED1_TIMES 10806 0.09 1.26 0.75 0.00 1.00 1.0 1.00 10 ▇▁▁▁▁
DEVHX_8_MED1_TIMES_DK 11718 0.01 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED1_HOW_MUCH 10969 0.08 82.50 191.78 0.00 1.00 10.0 88.00 2000 ▇▁▁▁▁
DEVHX_8_MED1_HOW_MUCH_DK 11452 0.04 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED1_UNIT 11044 0.07 2.69 0.79 1.00 2.75 3.0 3.00 10 ▃▇▁▁▁
DEVHX_8_MED1_FU 11099 0.07 0.25 0.43 0.00 0.00 0.0 1.00 1 ▇▁▁▁▃
DEVHX_8_MED2_PRN 11709 0.01 36.19 186.43 0.00 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_MED2_TIMES 11730 0.01 1.40 0.77 0.00 1.00 1.0 2.00 6 ▇▂▁▁▁
DEVHX_8_MED2_TIMES_DK 11844 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED2_HOW_MUCH 11782 0.01 87.58 247.66 0.00 1.00 2.5 50.00 2000 ▇▁▁▁▁
DEVHX_8_MED2_HOW_MUCH_DK 11788 0.01 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED2_UNIT 11791 0.01 2.74 1.09 1.00 2.00 3.0 3.00 10 ▃▇▁▁▁
DEVHX_8_MED2_FU 11698 0.01 0.26 0.44 0.00 0.00 0.0 1.00 1 ▇▁▁▁▃
DEVHX_8_MED3_PRN 11846 0.00 0.27 0.45 0.00 0.00 0.0 0.75 1 ▇▁▁▁▃
DEVHX_8_MED3_TIMES 11819 0.00 1.30 0.80 0.00 1.00 1.0 1.00 4 ▁▇▂▁▁
DEVHX_8_MED3_TIMES_DK 11866 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED3_HOW_MUCH 11852 0.00 189.32 478.06 0.25 1.00 1.5 42.50 2000 ▇▁▁▁▁
DEVHX_8_MED3_HOW_MUCH_DK 11853 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MED3_UNIT 11856 0.00 2.55 0.76 1.00 2.00 3.0 3.00 3 ▂▁▂▁▇
DEVHX_8_TOBACCO 6 1.00 23.11 149.74 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY 10453 0.12 8.35 6.44 0.00 4.00 6.0 10.00 80 ▇▁▁▁▁
DEVHX_8_CIGS_PER_DAY_DK 11689 0.02 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCOHOL 6 1.00 57.22 231.63 0.00 0.00 0.0 1.00 999 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX 9334 0.21 2.40 1.50 0.00 1.00 2.0 3.00 20 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_MAX_DK 11536 0.03 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCHOHOL_AVG 9369 0.21 3.97 4.38 0.00 1.00 3.0 5.00 52 ▇▁▁▁▁
DEVHX_8_ALCHOHOL_AVG_DK 11501 0.03 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_ALCOHOL_EFFECTS 9512 0.20 2.08 1.24 0.00 1.00 2.0 2.00 20 ▇▁▁▁▁
DEVHX_8_ALCOHOL_EFFECTS_DK 11358 0.04 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_MARIJUANA 5 1.00 28.25 165.43 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT 11387 0.04 2.09 2.49 0.00 1.00 2.0 3.00 40 ▇▁▁▁▁
DEVHX_8_MARIJUANA_AMT_DK 11685 0.02 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_COC_CRACK 5 1.00 23.74 152.14 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT 11843 0.00 3.00 3.71 0.00 1.00 2.0 3.00 20 ▇▁▁▁▁
DEVHX_8_COC_CRACK_AMT_DK 11815 0.01 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_HER_MORPH 5 1.00 24.41 154.23 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_HER_MORPH_AMT 11873 0.00 2.67 1.15 2.00 2.00 2.0 3.00 4 ▇▁▁▁▃
DEVHX_8_HER_MORPH_AMT_DK 11855 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OXYCONT 5 1.00 25.08 156.29 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OXYCONT_AMT 11861 0.00 2.13 1.19 0.00 1.00 2.0 3.00 4 ▂▇▇▇▃
DEVHX_8_OXYCONT_AMT_DK 11849 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER_DRUGS 5 1.00 32.07 176.08 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OTHER1_NAME_2 11779 0.01 46.53 198.62 0.00 1.00 3.0 12.00 999 ▇▁▁▁▁
DEVHX_8_OTHER1_TIMES 11829 0.00 1.94 1.98 0.00 1.00 1.0 2.00 10 ▇▂▁▁▁
DEVHX_8_OTHER1_TIMES_DK 11824 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER1_AMT 11845 0.00 92.94 286.57 0.00 1.00 2.0 16.25 1500 ▇▁▁▁▁
DEVHX_8_OTHER1_AMT_DK 11808 0.01 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER1_UNIT 11846 0.00 3.20 2.11 1.00 1.25 3.0 3.00 7 ▆▇▁▁▃
DEVHX_8_OTHER2_NAME_2 11797 0.01 13.77 112.32 0.00 0.00 0.0 0.00 999 ▇▁▁▁▁
DEVHX_8_OTHER2_TIMES 11871 0.00 1.40 0.89 1.00 1.00 1.0 1.00 3 ▇▁▁▁▂
DEVHX_8_OTHER2_TIMES_DK 11869 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER2_AMT 11874 0.00 6.00 2.83 4.00 5.00 6.0 7.00 8 ▇▁▁▁▇
DEVHX_8_OTHER2_AMT_DK 11866 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER2_UNIT 11874 0.00 5.00 2.83 3.00 4.00 5.0 6.00 7 ▇▁▁▁▇
DEVHX_8_OTHER3_NAME_2 11868 0.00 3.00 5.26 0.00 0.00 0.0 3.50 12 ▇▁▁▁▂
DEVHX_8_OTHER3_TIMES 11874 0.00 2.00 1.41 1.00 1.50 2.0 2.50 3 ▇▁▁▁▇
DEVHX_8_OTHER3_TIMES_DK 11875 0.00 999.00 NA 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER3_AMT 11875 0.00 10.00 NA 10.00 10.00 10.0 10.00 10 ▁▁▇▁▁
DEVHX_8_OTHER3_AMT_DK 11874 0.00 999.00 0.00 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER3_UNIT 11875 0.00 3.00 NA 3.00 3.00 3.0 3.00 3 ▁▁▇▁▁
DEVHX_8_OTHER4_NAME_2 11873 0.00 0.33 0.58 0.00 0.00 0.0 0.50 1 ▇▁▁▁▃
DEVHX_8_OTHER4_TIMES_DK 11875 0.00 999.00 NA 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER4_AMT_DK 11875 0.00 999.00 NA 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER5_NAME_2 11875 0.00 1.00 NA 1.00 1.00 1.0 1.00 1 ▁▁▇▁▁
DEVHX_8_OTHER5_TIMES_DK 11875 0.00 999.00 NA 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DEVHX_8_OTHER5_AMT_DK 11875 0.00 999.00 NA 999.00 999.00 999.0 999.00 999 ▁▁▇▁▁
DevHis %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(starts_with("DEVHX_8")) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 83
_______________________
Column type frequency:
character 5
logical 10
numeric 68
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
DEVHX_8_RXNORM_MED1 0 NaN NA NA 0 0 0
DEVHX_8_RXNORM_MED2 0 NaN NA NA 0 0 0
DEVHX_8_RXNORM_MED3 0 NaN NA NA 0 0 0
DEVHX_8_OTHER1_NAME_OTH 0 NaN NA NA 0 0 0
DEVHX_8_OTHER3_NAME_OTH 0 NaN NA NA 0 0 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEVHX_8_PRESCRIPT_YES 0 NaN NaN :
DEVHX_8_OTHER2_NAME_OTH 0 NaN NaN :
DEVHX_8_OTHER4_NAME_OTH 0 NaN NaN :
DEVHX_8_OTHER4_TIMES 0 NaN NaN :
DEVHX_8_OTHER4_AMT 0 NaN NaN :
DEVHX_8_OTHER4_UNIT 0 NaN NaN :
DEVHX_8_OTHER5_NAME_OTH 0 NaN NaN :
DEVHX_8_OTHER5_TIMES 0 NaN NaN :
DEVHX_8_OTHER5_AMT 0 NaN NaN :
DEVHX_8_OTHER5_UNIT 0 NaN NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEVHX_8_PRESCRIPT_MED 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_PRN 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_HOW_MUCH 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_HOW_MUCH_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED1_FU 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_PRN 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_HOW_MUCH 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_HOW_MUCH_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED2_FU 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_PRN 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_HOW_MUCH 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_HOW_MUCH_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MED3_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_TOBACCO 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_CIGS_PER_DAY 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_CIGS_PER_DAY_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCOHOL 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCHOHOL_MAX 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCHOHOL_MAX_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCHOHOL_AVG 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCHOHOL_AVG_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCOHOL_EFFECTS 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_ALCOHOL_EFFECTS_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MARIJUANA 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MARIJUANA_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_MARIJUANA_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_COC_CRACK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_COC_CRACK_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_COC_CRACK_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_HER_MORPH 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_HER_MORPH_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_HER_MORPH_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OXYCONT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OXYCONT_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OXYCONT_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER_DRUGS 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_NAME_2 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER1_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_NAME_2 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER2_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_NAME_2 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_TIMES 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_AMT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER3_UNIT 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER4_NAME_2 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER4_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER4_AMT_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER5_NAME_2 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER5_TIMES_DK 0 NaN NaN NA NA NA NA NA NA
DEVHX_8_OTHER5_AMT_DK 0 NaN NaN NA NA NA NA NA NA
#devhx_8_tobacco
#Before knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_9_tobacco
#Knowing of pregnancy. Tobacco? How many times per day?/ ?Cuantas veces al d?a?

#devhx_8_alcohol
#Before knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_9_alcohol
#Knowing of pregnancy. Alcohol? /?Alcohol?

#devhx_8_marijuana
#Before knowing of pregnancy. Marijuana? /?Marihuana?

#devhx_9_marijuana
#Knowing of pregnancy. Marijuana? /?Marihuana?

# change name and replace 999 with na

momSubstanceUse <- DevHis %>% 
  mutate_if(is.numeric, ~na_if(., 999)) %>% 
  mutate(tobacco_before_preg = as.factor(DEVHX_8_TOBACCO)) %>% 
  mutate(tobacco_after_preg = as.factor(DEVHX_9_TOBACCO)) %>% 
  mutate(alcohol_before_preg = as.factor(DEVHX_8_ALCOHOL)) %>% 
  mutate(alcohol_after_preg = as.factor(DEVHX_9_ALCOHOL)) %>% 
  mutate(marijuana_before_preg = as.factor(DEVHX_8_MARIJUANA)) %>% 
  mutate(marijuana_after_preg = as.factor(DEVHX_9_MARIJUANA)) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, ends_with("_preg")) %>%
  droplevels()

momSubstanceUse %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 6
_______________________
Column type frequency:
factor 6
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
tobacco_before_preg 279 0.98 FALSE 2 0: 9987, 1: 1610
tobacco_after_preg 265 0.98 FALSE 2 0: 10991, 1: 620
alcohol_before_preg 683 0.94 FALSE 2 0: 8311, 1: 2882
alcohol_after_preg 293 0.98 FALSE 2 0: 11268, 1: 315
marijuana_before_preg 340 0.97 FALSE 2 0: 10849, 1: 687
marijuana_after_preg 277 0.98 FALSE 2 0: 11354, 1: 245
momSubstanceUse %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 6
_______________________
Column type frequency:
factor 6
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
tobacco_before_preg 0 NaN FALSE 0 0: 0, 1: 0
tobacco_after_preg 0 NaN FALSE 0 0: 0, 1: 0
alcohol_before_preg 0 NaN FALSE 0 0: 0, 1: 0
alcohol_after_preg 0 NaN FALSE 0 0: 0, 1: 0
marijuana_before_preg 0 NaN FALSE 0 0: 0, 1: 0
marijuana_after_preg 0 NaN FALSE 0 0: 0, 1: 0

2.5.3 developmental adversity

DevHis %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 30
_______________________
Column type frequency:
logical 3
numeric 27
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
DEVHX_10C3_C_P 11876 0 NaN :
DEVHX_10C3_A_P 11876 0 NaN :
DEVHX_10C3_B_P 11876 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
BIRTH_WEIGHT_LBS 517 0.96 6.59 1.47 1 6 7 8 14 ▁▆▇▁▁
BIRTH_WEIGHT_OZ 1241 0.90 7.16 4.35 0 4 7 11 15 ▇▇▇▆▅
DEVHX_10 9 1.00 39.22 191.65 -1 1 1 1 999 ▇▁▁▁▁
DEVHX_10A3_P 4 1.00 32.45 176.72 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10B3_P 4 1.00 27.22 162.52 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10C3_P 4 1.00 32.47 176.95 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10D3_P 4 1.00 27.95 164.71 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10E3_P 4 1.00 34.17 181.56 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10F3_P 4 1.00 26.26 159.81 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10G3_P 4 1.00 31.43 174.27 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10H3_P 4 1.00 39.29 193.99 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10I3_P 4 1.00 30.78 172.45 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10J3_P 4 1.00 33.92 180.68 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10K3_P 4 1.00 29.99 170.38 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10L3_P 4 1.00 26.61 160.80 0 0 0 0 999 ▇▁▁▁▁
DEVHX_10M3_P 4 1.00 28.95 167.33 0 0 0 0 999 ▇▁▁▁▁
DEVHX_12A_P 5 1.00 11.97 107.83 0 0 0 0 999 ▇▁▁▁▁
DEVHX_12_P 9668 0.19 17.10 109.30 1 3 4 6 999 ▇▁▁▁▁
DEVHX_13_3_P 5 1.00 12.58 109.70 0 0 0 1 999 ▇▁▁▁▁
DEVHX_14A3_P 5 1.00 26.71 161.06 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14B3_P 5 1.00 28.56 166.39 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14C3_P 5 1.00 25.04 156.03 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14D3_P 5 1.00 18.43 134.44 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14E3_P 5 1.00 24.23 153.16 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14F3_P 5 1.00 25.09 156.02 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14G3_P 5 1.00 17.42 130.77 0 0 0 0 999 ▇▁▁▁▁
DEVHX_14H3_P 6 1.00 34.36 182.00 0 0 0 0 999 ▇▁▁▁▁
DevHis %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(starts_with(c("BIRTH_WEIGHT","DEVHX_10","DEVHX_12","DEVHX_13","DEVHX_14"))) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 30
_______________________
Column type frequency:
logical 3
numeric 27
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
DEVHX_10C3_C_P 0 NaN NaN :
DEVHX_10C3_A_P 0 NaN NaN :
DEVHX_10C3_B_P 0 NaN NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
BIRTH_WEIGHT_LBS 0 NaN NaN NA NA NA NA NA NA
BIRTH_WEIGHT_OZ 0 NaN NaN NA NA NA NA NA NA
DEVHX_10 0 NaN NaN NA NA NA NA NA NA
DEVHX_10A3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10B3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10C3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10D3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10E3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10F3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10G3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10H3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10I3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10J3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10K3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10L3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_10M3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_12A_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_12_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_13_3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14A3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14B3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14C3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14D3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14E3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14F3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14G3_P 0 NaN NaN NA NA NA NA NA NA
DEVHX_14H3_P 0 NaN NaN NA NA NA NA NA NA
#devhx_12a_p
#Was the child born prematurely? /?Naci? el ni?o o la ni?a antes de tiempo?
adversitySum <- DevHis %>% 
  mutate_if(is.numeric, ~na_if(., 999))  %>%
  mutate(deveplopment_prematurity = as.factor(DEVHX_12A_P)) %>%
  mutate(deveplopment_birth_complications = rowSums(dplyr::select(.,starts_with("DEVHX_14")))) %>%
  #mutate(deveplopment_birth_kg = BIRTH_WEIGHT_LBS*0.453592) %>% #all na???
  mutate(deveplopment_pregnancy_complications = rowSums(dplyr::select(.,DEVHX_10A3_P:DEVHX_10L3_P))) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("deveplopment_")) 

adversitySum %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 3
_______________________
Column type frequency:
factor 1
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
deveplopment_prematurity 145 0.99 FALSE 2 0: 9523, 1: 2208

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
deveplopment_birth_complications 760 0.94 0.37 0.75 0 0 0 1 8 ▇▁▁▁▁
deveplopment_pregnancy_complications 743 0.94 0.61 1.02 0 0 0 1 12 ▇▁▁▁▁
adversitySum %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 3
_______________________
Column type frequency:
factor 1
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
deveplopment_prematurity 0 NaN FALSE 0 0: 0, 1: 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
deveplopment_birth_complications 0 NaN NaN NA NA NA NA NA NA
deveplopment_pregnancy_complications 0 NaN NaN NA NA NA NA NA NA

2.5.4 brain truma

most events are quite rare.

brainTruma <- as_tibble(read.csv(paste0(dataFold,"ABCD_OTBI01_DATA_TABLE.csv"))) 

brainTruma %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-1:-8) %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 39
_______________________
Column type frequency:
logical 3
numeric 36
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
TBI_8I 11876 0 NaN :
TBI_8K 11876 0 NaN :
TBI_8L 11876 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TBI_SELECT_LANGUAGE___1 0 1.00 0.05 0.23 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_1 4 1.00 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_1B 10434 0.12 0.07 0.28 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBI_1C 10436 0.12 0.17 0.37 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_1D 10435 0.12 4.98 2.63 0.0 3.00 5.0 7.00 11 ▇▇▇▇▃
TBI_2 4 1.00 0.02 0.12 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2B 11688 0.02 0.04 0.19 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2C 11688 0.02 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_2D 11688 0.02 6.61 2.18 0.0 5.00 7.0 8.00 10 ▁▂▅▇▃
TBI_3 4 1.00 0.12 0.33 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_3B 10396 0.12 0.04 0.21 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBI_3C 10397 0.12 0.16 0.37 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_3D 10398 0.12 5.86 2.70 0.0 4.00 6.0 8.00 11 ▅▅▆▇▅
TBI_4 4 1.00 0.00 0.07 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_4B 11817 0.00 0.02 0.13 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_4C 11817 0.00 0.17 0.38 0.0 0.00 0.0 0.00 1 ▇▁▁▁▂
TBI_4D 11817 0.00 6.53 2.44 0.0 5.00 7.0 8.00 10 ▂▂▅▇▅
TBI_5 4 1.00 0.00 0.03 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_5B 11866 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_5C 11866 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_5D 11866 0.00 5.10 2.91 1.5 2.25 5.5 7.75 9 ▇▁▃▂▆
TBI_6O 4 1.00 0.00 0.03 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_6P 11867 0.00 1.67 1.12 1.0 1.00 1.0 2.00 4 ▇▁▁▁▁
TBI_6Q 11867 0.00 1.78 1.20 0.0 1.00 2.0 2.00 4 ▂▇▇▂▂
TBI_6R 11867 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
TBI_6S 11867 0.00 4.56 3.71 1.0 1.00 3.0 8.00 10 ▇▂▂▂▃
TBI_7A 4 1.00 0.01 0.09 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7C1 11769 0.01 0.07 0.37 0.0 0.00 0.0 0.00 3 ▇▁▁▁▁
TBL_7C2 11773 0.01 0.08 0.27 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7E 11769 0.01 4.50 3.63 0.0 1.00 5.0 8.00 10 ▇▁▂▅▃
TBI_7F 11769 0.01 5.62 4.11 0.0 0.50 8.0 9.00 10 ▆▂▁▂▇
TBI_7G 11769 0.01 0.02 0.14 0.0 0.00 0.0 0.00 1 ▇▁▁▁▁
TBI_7I 11874 0.00 1.00 0.00 1.0 1.00 1.0 1.00 1 ▁▁▇▁▁
TBI_7K 11874 0.00 4.00 5.66 0.0 2.00 4.0 6.00 8 ▇▁▁▁▇
TBI_7L 11874 0.00 4.00 5.66 0.0 2.00 4.0 6.00 8 ▇▁▁▁▇
TBI_8G 11874 0.00 0.00 0.00 0.0 0.00 0.0 0.00 0 ▁▁▇▁▁
brainTruma %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-1:-8) %>%
 skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 39
_______________________
Column type frequency:
logical 3
numeric 36
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
TBI_8I 0 NaN NaN :
TBI_8K 0 NaN NaN :
TBI_8L 0 NaN NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TBI_SELECT_LANGUAGE___1 0 NaN NaN NA NA NA NA NA NA
TBI_1 0 NaN NaN NA NA NA NA NA NA
TBI_1B 0 NaN NaN NA NA NA NA NA NA
TBI_1C 0 NaN NaN NA NA NA NA NA NA
TBI_1D 0 NaN NaN NA NA NA NA NA NA
TBI_2 0 NaN NaN NA NA NA NA NA NA
TBI_2B 0 NaN NaN NA NA NA NA NA NA
TBI_2C 0 NaN NaN NA NA NA NA NA NA
TBI_2D 0 NaN NaN NA NA NA NA NA NA
TBI_3 0 NaN NaN NA NA NA NA NA NA
TBI_3B 0 NaN NaN NA NA NA NA NA NA
TBI_3C 0 NaN NaN NA NA NA NA NA NA
TBI_3D 0 NaN NaN NA NA NA NA NA NA
TBI_4 0 NaN NaN NA NA NA NA NA NA
TBI_4B 0 NaN NaN NA NA NA NA NA NA
TBI_4C 0 NaN NaN NA NA NA NA NA NA
TBI_4D 0 NaN NaN NA NA NA NA NA NA
TBI_5 0 NaN NaN NA NA NA NA NA NA
TBI_5B 0 NaN NaN NA NA NA NA NA NA
TBI_5C 0 NaN NaN NA NA NA NA NA NA
TBI_5D 0 NaN NaN NA NA NA NA NA NA
TBI_6O 0 NaN NaN NA NA NA NA NA NA
TBI_6P 0 NaN NaN NA NA NA NA NA NA
TBI_6Q 0 NaN NaN NA NA NA NA NA NA
TBI_6R 0 NaN NaN NA NA NA NA NA NA
TBI_6S 0 NaN NaN NA NA NA NA NA NA
TBI_7A 0 NaN NaN NA NA NA NA NA NA
TBI_7C1 0 NaN NaN NA NA NA NA NA NA
TBL_7C2 0 NaN NaN NA NA NA NA NA NA
TBI_7E 0 NaN NaN NA NA NA NA NA NA
TBI_7F 0 NaN NaN NA NA NA NA NA NA
TBI_7G 0 NaN NaN NA NA NA NA NA NA
TBI_7I 0 NaN NaN NA NA NA NA NA NA
TBI_7K 0 NaN NaN NA NA NA NA NA NA
TBI_7L 0 NaN NaN NA NA NA NA NA NA
TBI_8G 0 NaN NaN NA NA NA NA NA NA

2.6 culture

2.6.1 bilingual

https://www.nature.com/articles/s41562-019-0609-3 https://github.com/anthonystevendick/bilingual_abcd/blob/master/bilingual_analysis.r accult_q1_y How well do you speak English?
1 = Poor; 2 = Fair; 3 = Good; 4 = Excellent accult_q2_y Besides English, do you speak or understand another language or dialect? If child asks about languages learned in school, the RA should state: That’s OK, as long as it is a language or dialect that you speak or understand.
1 = Poor Mal; 2 = Fair Regular; 3 = Good Bien; 4 = Excellent Excelente; 777 = Refused Niego contestar; 999 = Don’t Know No se
accult_q4_y What language do you speak with most of your friends?
1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time
accult_q5_y What language do you speak with most of your family? 1 = (Other language) all the time; 2 = (Other language) most of the time; 3 = (Other language) and English equally; 4 = English most of the time; 5 = English all the time

bilingual <-as_tibble(read.csv(paste0(dataFold,"YACC01_DATA_TABLE.csv"))) 
#%>% filter(EVENTNAME =="baseline_year_1_arm_1")

#bilingual_status
# #recode the accult_q2_y variable into a binary "Bilingual Status", 0 = not bilingual; 1 = bilingual
# 
# bilingual_status <- biLingual$ACCULT_Q2_Y
# sum(is.na(bilingual_status))

#bilingual_degree
# #dimension a 'bilingual degree' variable, where 1 = participant said they were bilingual, and they speak the other language with friends all the time, most of the time,
# #or equally, OR they speak the other language with family all the time, most of the time, or equally.
# 
# bilingual_degree <- ifelse(bilingual_status == 0, 0, ifelse(bilingual_status == 1 & (as.numeric(accult_q4_y) <= 3 | as.numeric(accult_q5_y) <= 3), 1, NA))
# count(bilingual_degree) #check the data
# sum(is.na(bilingual_degree))
#### here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English 

#bilingual_use
# 
# #dimension a continuous 'bilingual use' variable, and reverse-score so that if participants speak the other language with friends all the time, most of the time...,
# #they will receive high scores on this measure (range 0-8, with 8 indicating a high-degree of other language use)
# 
# bilingual_use<-10-(as.numeric(abcd_subset$accult_q4_y)+as.numeric(abcd_subset$accult_q5_y))
# sum(is.na(bilingual_use))
####  here I change it such that 0 = non-bilingual, 1 = bilingual who use (Other language) < English, 2 = bilingual who use (Other language) >= English  


bilingualAdded <- bilingual %>% 
  mutate(bilingual_status = factor(ifelse(ACCULT_Q2_Y==777,NA,ACCULT_Q2_Y))) %>%
  mutate(bilingual_degree = factor(ifelse(bilingual_status == 0, 0, 
                                    ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) <= 3 | as.numeric(ACCULT_Q5_Y) <= 3), 1,
                                           ifelse(bilingual_status == 1 & (as.numeric(ACCULT_Q4_Y) > 3 | as.numeric(ACCULT_Q5_Y) > 3), 2,NA))))) %>%
      mutate(bilingual_use = ifelse(bilingual_status == 0, 0,
                                  11-(as.numeric(ACCULT_Q4_Y)+as.numeric(ACCULT_Q5_Y))))

bilingualSum <- bilingualAdded %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("bilingual_"))

bilingualSum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 3
_______________________
Column type frequency:
factor 2
numeric 1
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
bilingual_status 82 0.99 FALSE 2 0: 7357, 1: 4437
bilingual_degree 82 0.99 FALSE 3 0: 7357, 2: 2753, 1: 1684

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bilingual_use 82 0.99 1.02 1.7 0 0 0 1 9 ▇▂▁▁▁
bilingualSum %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-SUBJECTKEY,-EVENTNAME) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 3
_______________________
Column type frequency:
factor 2
numeric 1
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
bilingual_status 72 0.99 FALSE 2 0: 6246, 1: 4096
bilingual_degree 72 0.99 FALSE 3 0: 6246, 2: 2726, 1: 1370

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bilingual_use 72 0.99 1.01 1.65 0 0 0 1 9 ▇▂▁▁▁

2.7 social demographics

race/ethnicity is from ACS family income family type household size parents’ work status # demo_prnt_empl_v2 [a bit too much to include] parents’ education sumEcon_insecurities

2.7.1 ABCD Parent Demographics Survey

demograp <-as_tibble(read.csv(paste0(dataFold,"PDEM02_DATA_TABLE.csv"))) 

demograp %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  dplyr::select(-1:-8) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 124
_______________________
Column type frequency:
character 2
logical 3
numeric 119
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
DEMO_RACE_NOTES_V2 0 1 0 30 11790 43 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEMO_YEARS_US_V2 11876 0 NaN :
DEMO_RELIG2_V2 11876 0 NaN :
DEMO_YRS_2_2 11876 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEMO_PRIM 0 1.00 1.23 0.67 1 1.00 1.0 1.00 5 ▇▁▁▁▁
DEMO_BRTHDAT_V2 11 1.00 9.48 0.51 8 9.00 9.0 10.00 11 ▁▇▁▇▁
DEMO_ED_V2 2 1.00 4.21 0.79 0 4.00 4.0 5.00 12 ▁▇▅▁▁
DEMO_ADOPT_AGEX_V2 11602 0.02 2.18 2.43 0 0.00 1.0 3.00 10 ▇▂▁▁▁
DEMO_ADOPT_AGEX_V2_BL_DK 11872 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_SEX_V2 0 1.00 1.48 0.50 1 1.00 1.0 2.00 3 ▇▁▇▁▁
DEMO_GENDER_ID_V2 2 1.00 2.20 26.22 1 1.00 1.0 2.00 999 ▇▁▁▁▁
DEMO_RACE_A_P___10 0 1.00 0.74 0.44 0 0.00 1.0 1.00 1 ▃▁▁▁▇
DEMO_RACE_A_P___11 0 1.00 0.21 0.41 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_RACE_A_P___12 0 1.00 0.03 0.18 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___13 0 1.00 0.00 0.02 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___14 0 1.00 0.00 0.04 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___15 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___16 0 1.00 0.00 0.03 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___17 0 1.00 0.00 0.06 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___18 0 1.00 0.01 0.10 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___19 0 1.00 0.02 0.13 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___20 0 1.00 0.01 0.12 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___21 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___22 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___23 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___24 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___25 0 1.00 0.07 0.25 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___77 0 1.00 0.00 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_RACE_A_P___99 0 1.00 0.01 0.09 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_ETHN_V2 10 1.00 12.93 101.50 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_ETHN2_V2 9477 0.20 34.04 124.33 10 13.00 14.0 20.00 999 ▇▁▁▁▁
DEMO_ORIGIN_V2 2 1.00 187.28 31.08 1 189.00 189.0 189.00 999 ▇▁▁▁▁
DEMO_YEARS_US_V2_DK 11857 0.00 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_RELIG_V2 4 1.00 52.62 197.17 1 4.00 11.0 17.00 999 ▇▁▁▁▁
DEMO_PRNT_AGE_V2 91 0.99 39.96 6.84 23 35.00 40.0 44.00 80 ▃▇▂▁▁
DEMO_PRNT_AGE_V2_BL_REFUSE 11789 0.01 863.76 108.95 777 777.00 777.0 999.00 999 ▇▁▁▁▅
DEMO_PRNT_GENDER_ID_V2 2 1.00 2.24 16.92 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___10 0 1.00 0.73 0.44 0 0.00 1.0 1.00 1 ▃▁▁▁▇
DEMO_PRNT_RACE_A_V2___11 0 1.00 0.17 0.38 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_PRNT_RACE_A_V2___12 0 1.00 0.03 0.16 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___13 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___14 0 1.00 0.00 0.04 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___15 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___16 0 1.00 0.00 0.02 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___17 0 1.00 0.00 0.05 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___18 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___19 0 1.00 0.01 0.11 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___20 0 1.00 0.01 0.10 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___21 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___22 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___23 0 1.00 0.00 0.05 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___24 0 1.00 0.01 0.07 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___25 0 1.00 0.06 0.24 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___77 0 1.00 0.01 0.08 0 0.00 0.0 0.00 1 ▇▁▁▁▁
DEMO_PRNT_RACE_A_V2___99 0 1.00 0.01 0.11 0 0.00 0.0 0.00 1 ▇▁▁▁▁
NAAS_ID 11470 0.03 100.79 297.31 1 2.00 2.0 4.00 999 ▇▁▁▁▁
NAAS_MOM_ID 11470 0.03 155.13 358.70 1 2.00 4.0 5.00 999 ▇▁▁▁▂
NAAS_ID_DAD 11470 0.03 268.13 440.53 1 2.00 5.0 999.00 999 ▇▁▁▁▃
NAAS_BIRTHPLACE 11470 0.03 249.36 429.07 1 5.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_RAISED 11470 0.03 227.40 415.23 1 5.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_COMM_CONTACT 11470 0.03 209.88 403.54 1 4.00 5.0 5.00 999 ▇▁▁▁▂
NAAS_PRIDE 11470 0.03 115.17 316.32 1 1.00 2.0 3.00 999 ▇▁▁▁▁
NAAS_SELF_RATING 11470 0.03 59.91 230.42 1 3.00 4.0 4.00 999 ▇▁▁▁▁
NAAS_TRADITIONS 11470 0.03 53.31 215.53 1 4.00 5.0 5.00 999 ▇▁▁▁▁
DEMO_PRNT_ETHN_V2 2 1.00 7.28 71.21 1 2.00 2.0 2.00 999 ▇▁▁▁▁
DEMO_PRNT_ETHN2_V2 9877 0.17 21.38 62.54 10 13.00 14.0 20.00 999 ▇▁▁▁▁
DEMO_PRNT_16 2 1.00 0.32 0.47 0 0.00 0.0 1.00 1 ▇▁▁▁▃
DEMO_PRNT_16A 3816 0.68 0.99 0.11 0 1.00 1.0 1.00 1 ▁▁▁▁▇
DEMO_PRNT_ORIGIN_V2 7964 0.33 142.06 62.37 1 111.00 185.0 189.00 999 ▇▁▁▁▁
DEMO_BIOFATHER_V2 7964 0.33 149.63 101.31 1 111.00 186.0 189.00 999 ▇▁▁▁▁
DEMO_BIOMOTHER_V2 7964 0.33 149.34 79.81 1 111.00 189.0 189.00 999 ▇▁▁▁▁
DEMO_MATGRANDM_V2 7964 0.33 137.67 102.20 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_MATGRANDF_V2 7964 0.33 140.86 115.27 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PATGRANDM_V2 7964 0.33 149.52 144.13 1 81.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PATGRANDF_V2 7964 0.33 154.45 157.92 1 82.00 125.0 189.00 999 ▇▁▁▁▁
DEMO_PRNT_YEARS_US_V2 1947 0.84 35.90 10.77 1 31.00 38.0 43.00 100 ▂▇▅▁▁
DEMO_PRNT_YEARS_US_V2_DK 11745 0.01 999.00 0.00 999 999.00 999.0 999.00 999 ▁▁▇▁▁
DEMO_PRNT_MARITAL_V2 2 1.00 8.23 68.70 1 1.00 1.0 3.00 777 ▇▁▁▁▁
DEMO_PRNT_ED_V2 0 1.00 17.68 28.88 1 15.00 18.0 19.00 777 ▇▁▁▁▁
DEMO_PRNT_EMPL_V2 2 1.00 5.97 52.17 1 1.00 1.0 5.00 777 ▇▁▁▁▁
DEMO_PRNT_EMPL_TIME 3692 0.69 1.27 0.44 1 1.00 1.0 2.00 2 ▇▁▁▁▃
DEMO_PRNT_INCOME_V2 2 1.00 84.95 253.47 1 3.00 6.0 8.00 999 ▇▁▁▁▁
DEMO_PRNT_PRTNR_V2 2 1.00 9.23 78.55 1 1.00 1.0 1.00 777 ▇▁▁▁▁
DEMO_PRNT_PRTNR_BIO 2397 0.80 2.53 32.50 1 1.00 1.0 1.00 999 ▇▁▁▁▁
DEMO_PRNT_PRTNR_ADOPT 10095 0.15 0.20 0.40 0 0.00 0.0 0.00 1 ▇▁▁▁▂
DEMO_PRTNR_ED_V2 2400 0.80 22.91 78.49 0 15.00 18.0 18.00 999 ▇▁▁▁▁
DEMO_PRTNR_EMPL_V2 2453 0.79 11.16 90.79 1 1.00 1.0 1.00 999 ▇▁▁▁▁
DEMO_PRTNR_EMPL_TIME 3664 0.69 1.06 0.23 1 1.00 1.0 1.00 2 ▇▁▁▁▁
DEMO_PRTNR_INCOME_V2 2396 0.80 95.59 269.83 1 6.00 7.0 9.00 999 ▇▁▁▁▁
DEMO_COMB_INCOME_V2 2 1.00 82.51 248.28 1 6.00 8.0 9.00 999 ▇▁▁▁▁
DEMO_FAM_EXP1_V2 2 1.00 4.92 61.14 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP2_V2 2 1.00 2.94 47.21 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP3_V2 2 1.00 3.97 54.63 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP4_V2 3 1.00 1.98 39.01 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP5_V2 3 1.00 2.74 45.58 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP6_V2 2 1.00 2.54 43.88 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_FAM_EXP7_V2 2 1.00 3.18 48.78 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_ROSTER_V2 279 0.98 4.72 1.77 0 4.00 4.0 5.00 77 ▇▁▁▁▁
DEMO_ROSTER_V2_REFUSE 11598 0.02 850.47 104.65 777 777.00 777.0 999.00 999 ▇▁▁▁▃
FAM_ROSTER_2C_V2 575 0.95 2.39 2.82 1 1.00 1.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_3C_V2 1044 0.91 3.54 1.96 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_4C_V2 2492 0.79 3.50 1.87 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_5C_V2 6270 0.47 4.02 2.69 1 3.00 3.0 3.00 15 ▇▁▁▁▁
FAM_ROSTER_6C_V2 9108 0.23 4.56 3.22 1 3.00 3.0 4.00 15 ▇▁▁▁▁
FAM_ROSTER_7C_V2 10716 0.10 5.10 3.64 1 3.00 3.0 6.00 15 ▇▂▁▁▁
FAM_ROSTER_8C_V2 11376 0.04 5.23 3.67 1 3.00 3.0 7.00 15 ▇▂▂▁▁
FAM_ROSTER_9C_V2 11645 0.02 5.44 3.73 2 3.00 4.0 7.00 15 ▇▁▁▁▁
FAM_ROSTER_10C_V2 11748 0.01 5.00 3.48 1 3.00 3.5 4.25 14 ▇▅▁▁▂
FAM_ROSTER_11C_V2 11825 0.00 4.98 3.08 3 3.00 4.0 5.00 14 ▇▁▁▁▁
FAM_ROSTER_12C_V2 11845 0.00 5.03 3.19 3 3.00 4.0 5.00 14 ▇▁▁▁▁
FAM_ROSTER_13C_V2 11862 0.00 5.50 3.65 3 3.25 4.0 5.50 14 ▇▁▁▁▂
FAM_ROSTER_14C_V2 11865 0.00 6.18 3.87 3 4.00 4.0 7.00 14 ▇▁▁▁▂
FAM_ROSTER_15C_V2 11868 0.00 8.12 5.28 1 4.00 7.0 13.25 15 ▂▇▂▁▇
DEMO_CHILD_TIME_V2 3 1.00 5.41 63.95 0 0.00 0.0 0.00 777 ▇▁▁▁▁
DEMO_CHILD_TIME2_V2 10629 0.11 52.67 38.69 0 24.00 48.0 84.00 168 ▇▆▆▁▁
DEMO_CHILD_TIME2_V2_DK 11795 0.01 960.63 84.46 777 999.00 999.0 999.00 999 ▂▁▁▁▇
DEMO_CHILD_TIME3_V2 10574 0.11 2.29 1.99 1 1.00 1.0 4.00 8 ▇▁▃▁▁
DEMO_YRS_1 3 1.00 20.85 129.15 0 0.00 1.0 3.00 999 ▇▁▁▁▁
DEMO_YRS_2 5 1.00 35.91 173.30 1 2.00 3.0 4.00 999 ▇▁▁▁▁
DEMO_YRS_2A_2 5430 0.54 85.48 279.03 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEMO_YRS_2B_2 5432 0.54 128.41 333.85 0 0.00 0.0 1.00 999 ▇▁▁▁▁
DEMO_YRS_2_NO_DISPLAY___1 0 1.00 0.46 0.50 0 0.00 0.0 1.00 1 ▇▁▁▁▇
DEMO_RACE_A_P___0 0 1.00 0.00 0.01 0 0.00 0.0 0.00 1 ▇▁▁▁▁
demograp %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  dplyr::select(-1:-8) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 124
_______________________
Column type frequency:
character 2
logical 3
numeric 119
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
DEMO_RACE_NOTES_V2 0 NaN NA NA 0 0 0
EVENTNAME 0 NaN NA NA 0 0 0

Variable type: logical

skim_variable n_missing complete_rate mean count
DEMO_YEARS_US_V2 0 NaN NaN :
DEMO_RELIG2_V2 0 NaN NaN :
DEMO_YRS_2_2 0 NaN NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DEMO_PRIM 0 NaN NaN NA NA NA NA NA NA
DEMO_BRTHDAT_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ED_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ADOPT_AGEX_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ADOPT_AGEX_V2_BL_DK 0 NaN NaN NA NA NA NA NA NA
DEMO_SEX_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_GENDER_ID_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___10 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___11 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___12 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___13 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___14 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___15 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___16 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___17 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___18 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___19 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___20 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___21 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___22 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___23 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___24 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___25 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___77 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___99 0 NaN NaN NA NA NA NA NA NA
DEMO_ETHN_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ETHN2_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ORIGIN_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_YEARS_US_V2_DK 0 NaN NaN NA NA NA NA NA NA
DEMO_RELIG_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_AGE_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_AGE_V2_BL_REFUSE 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_GENDER_ID_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___10 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___11 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___12 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___13 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___14 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___15 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___16 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___17 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___18 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___19 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___20 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___21 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___22 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___23 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___24 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___25 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___77 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_RACE_A_V2___99 0 NaN NaN NA NA NA NA NA NA
NAAS_ID 0 NaN NaN NA NA NA NA NA NA
NAAS_MOM_ID 0 NaN NaN NA NA NA NA NA NA
NAAS_ID_DAD 0 NaN NaN NA NA NA NA NA NA
NAAS_BIRTHPLACE 0 NaN NaN NA NA NA NA NA NA
NAAS_RAISED 0 NaN NaN NA NA NA NA NA NA
NAAS_COMM_CONTACT 0 NaN NaN NA NA NA NA NA NA
NAAS_PRIDE 0 NaN NaN NA NA NA NA NA NA
NAAS_SELF_RATING 0 NaN NaN NA NA NA NA NA NA
NAAS_TRADITIONS 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_ETHN_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_ETHN2_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_16 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_16A 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_ORIGIN_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_BIOFATHER_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_BIOMOTHER_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_MATGRANDM_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_MATGRANDF_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PATGRANDM_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PATGRANDF_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_YEARS_US_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_YEARS_US_V2_DK 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_MARITAL_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_ED_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_EMPL_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_EMPL_TIME 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_INCOME_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_PRTNR_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_PRTNR_BIO 0 NaN NaN NA NA NA NA NA NA
DEMO_PRNT_PRTNR_ADOPT 0 NaN NaN NA NA NA NA NA NA
DEMO_PRTNR_ED_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRTNR_EMPL_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_PRTNR_EMPL_TIME 0 NaN NaN NA NA NA NA NA NA
DEMO_PRTNR_INCOME_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_COMB_INCOME_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP1_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP2_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP3_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP4_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP5_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP6_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_FAM_EXP7_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ROSTER_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_ROSTER_V2_REFUSE 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_2C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_3C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_4C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_5C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_6C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_7C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_8C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_9C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_10C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_11C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_12C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_13C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_14C_V2 0 NaN NaN NA NA NA NA NA NA
FAM_ROSTER_15C_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_CHILD_TIME_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_CHILD_TIME2_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_CHILD_TIME2_V2_DK 0 NaN NaN NA NA NA NA NA NA
DEMO_CHILD_TIME3_V2 0 NaN NaN NA NA NA NA NA NA
DEMO_YRS_1 0 NaN NaN NA NA NA NA NA NA
DEMO_YRS_2 0 NaN NaN NA NA NA NA NA NA
DEMO_YRS_2A_2 0 NaN NaN NA NA NA NA NA NA
DEMO_YRS_2B_2 0 NaN NaN NA NA NA NA NA NA
DEMO_YRS_2_NO_DISPLAY___1 0 NaN NaN NA NA NA NA NA NA
DEMO_RACE_A_P___0 0 NaN NaN NA NA NA NA NA NA
#demo_prnt_marital_v2 
#marital 
#Are you now married, widowed, divorced, separated, never married or living with a partner? ¬øUsted actualmente est√° casado(a), viudo(a), divorciado(a), separado(a), nunca se ha casado(a) o vive con una pareja?
#1 = Married Casado(a) ; 2 = Widowed Viudo(a) ; 3 = Divorced Divorciado(a) ; 4 = Separated Separado(a) ; 5 = Never married Nunca me he casado ; 6 = Living with partner Vivo con una pareja ; 777 = Refused to answer Prefiero no responder

#demo_prnt_ed_v2
#What is the highest grade or level of school you have completed or the highest degree you have received? ¿Cuál es su máximo nivel de estudios completados o el máximo título que ha recibido?
#0 = Never attended/Kindergarten only Nunca asistí/Kinder solamente ; 1 = 1st grade 1.er grado ; 2 = 2nd grade 2.º grado ; 3 = 3rd grade 3.er grado ; 4 = 4th grade 4.º grado ; 5 = 5th grade 5.º grado ; 6 = 6th grade 6.º grado ; 7 = 7th grade 7.º grado ; 8 = 8th grade 8.º grado ; 9 = 9th grade 9.º grado ; 10 = 10th grade 10.º grado ; 11 = 11th grade 11.º grado ; 12 = 12th grade; 13 = High school graduate Preparatoria terminada ; 14 = GED or equivalent Diploma General de Equivalencia (GED) o equivalente ; 15 = Some college; 16 = Associate degree: Occupational; 17 = Associate degree: Academic Program Título de asociado: programa académico ; 18 = Bachelor's degree (ex. BA; 19 = Master's degree (ex. MA; 20 = Professional School degree (ex. MD; 21 = Doctoral degree (ex. PhD; 777 = Refused to answer Prefiero no responder // The following questions are about your partner. Your "partner" refers to any significant figure in your life that helps you in raising your child or has helped you for more than 2 years. This person should be involved 40% or more of the daily activities your child does. For example, your partner could be your spouse. However, your partner could also be your boyfriend/girlfriend or relative.

#demo_comb_income_v2
#What is your TOTAL COMBINED FAMILY INCOME for the past 12 months? This should include income (before taxes and deductions) from all sources, wages, rent from properties, social security, disability and/or veteran's benefits, unemployment benefits, workman's compensation, help from relative (include child payments and alimony), and so on. ¿Cuál de estas categorías es la que mejor describe su INGRESO FAMILIAR TOTAL COMBINADO de los últimos 12 meses? Este debe incluir los ingresos (antes de impuestos y deducciones) provenientes de todas las fuentes, salarios, renta de propiedades, seguro social, pagos por incapacidad o subsidios para veteranos, subsidios por desempleo, compensación por accidentes de trabajo, ayuda de familiares (incluya pensiones alimenticias para hijos y cónyuges divorciados), etc.
#1= Less than $5,000; 2=$5,000 through $11,999; 3=$12,000 through $15,999; 4=$16,000 through $24,999; 5=$25,000 through $34,999; 6=$35,000 through $49,999; 7=$50,000 through $74,999; 8= $75,000 through $99,999; 9=$100,000 through $199,999; 10=$200,000 and greater. 999 = Don't know No lo sé ; 777 = Refuse to answer No deseo responder  | If Separated/Divorced, please average the two household incomes. Si es Separado(a) / Divorciado(a), por favor promedie los dos ingresos familiares

#demo_roster_v2
#How many people are living at your address? INCLUDE everyone who is living or staying at your address for more than 2 months. ¬øCu√°ntas personas est√°n viviendo o qued√°ndose en su domicilio? INCLUYA a todas las personas que lleven viviendo o qued√°ndose en su domicilio durante m√°s de 2 meses.

#demo_fam_exp1_v2
#demo_fam_exp2_v2
#demo_fam_exp3_v2
#demo_fam_exp4_v2
#demo_fam_exp5_v2
#demo_fam_exp6_v2
#demo_fam_exp7_v2
# In the past 12 months, has there been a time when you and your immediate family experienced any of the following: Needed food but couldn't afford to buy it or couldn't afford to go out to get it? ¬øNecesitaban comida pero no les alcanzaba el dinero para comprarla o para salir a comprarla?
# 0 = No No; 1 = Yes Sí; 777 = Refuse to answer Niego contestar

demograpSum <- demograp %>% 
  dplyr::select(SUBJECTKEY,EVENTNAME,DEMO_PRNT_MARITAL_V2,DEMO_PRNT_ED_V2,DEMO_PRTNR_ED_V2,DEMO_COMB_INCOME_V2,DEMO_ROSTER_V2, starts_with("DEMO_FAM_")) %>%
  mutate(marital = recode_factor(as.factor( DEMO_PRNT_MARITAL_V2 ),
                `1` = "married", `2` = "widowed", `3` = "divorced", 
                `4` = "separated", `5` = "neverMarried", `6` = "livingWithPartner",
                `777` = NA_character_, `999` = NA_character_,
                .default = "married")) %>%
  mutate(education1stPar = ifelse( DEMO_PRNT_ED_V2 %in% c(777,999) , NA, DEMO_PRNT_ED_V2) ) %>%
  mutate(education2ndPar = ifelse( DEMO_PRTNR_ED_V2 %in% c(777,999) , NA, DEMO_PRTNR_ED_V2) ) %>%
  mutate(educationAvg = rowMeans(cbind(education1stPar,education2ndPar),na.rm=T)) %>%
  mutate(combinedIncome = ifelse( DEMO_COMB_INCOME_V2 %in% c(777,999) , NA, DEMO_COMB_INCOME_V2) ) %>%
  mutate(householdSize= ifelse( DEMO_ROSTER_V2 %in% c(777,999) | DEMO_ROSTER_V2 > 20 , NA, DEMO_ROSTER_V2) ) %>% # trim people who live with > 20 ppl (2 people in total) 
  mutate(across((starts_with("DEMO_FAM_")), ~ na_if(., 777)),
        (across((starts_with("DEMO_FAM_")), ~ na_if(., 999)))) %>%
  mutate(econ_insecurities_sum = rowSums(dplyr::select(.,starts_with("DEMO_FAM_")),na.rm=F)) %>% 
  dplyr::select(-starts_with("DEMO_"))
  
  demograpSum  %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 9
_______________________
Column type frequency:
character 2
factor 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
marital 96 0.99 FALSE 6 mar: 7990, nev: 1460, div: 1081, liv: 688

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
education1stPar 17 1.00 16.59 2.77 1 15 18 19.0 21 ▁▁▂▅▇
education2ndPar 2466 0.79 16.37 3.06 0 15 18 18.0 21 ▁▁▁▅▇
educationAvg 14 1.00 16.38 2.70 3 15 17 18.5 21 ▁▁▂▇▇
combinedIncome 1018 0.91 7.22 2.42 1 6 8 9.0 10 ▂▂▃▆▇
householdSize 281 0.98 4.71 1.55 0 4 4 5.0 19 ▂▇▁▁▁
econ_insecurities_sum 135 0.99 0.47 1.10 0 0 0 0.0 7 ▇▁▁▁▁
  demograpSum  %>% filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 9
_______________________
Column type frequency:
character 2
factor 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 NaN NA NA 0 0 0
EVENTNAME 0 NaN NA NA 0 0 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
marital 0 NaN FALSE 0 mar: 0, wid: 0, div: 0, sep: 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
education1stPar 0 NaN NaN NA NA NA NA NA NA
education2ndPar 0 NaN NaN NA NA NA NA NA NA
educationAvg 0 NaN NaN NA NA NA NA NA NA
combinedIncome 0 NaN NaN NA NA NA NA NA NA
householdSize 0 NaN NaN NA NA NA NA NA NA
econ_insecurities_sum 0 NaN NaN NA NA NA NA NA NA

2.7.2 more Social Demographics from Residential History Derived Scores

“RESHIST_ADDR1_ADI_WSUM” Residential history derived - Area Deprivation Index: scaled weighted sum based on Kind et al., Annals of Internal Medicine, 2014 1 “RESHIST_ADDR1_GRNDTOT” the grand total Uniform Crime Reports, “RESHIST_ADDR1_LEADRISK” the estimated lead risk in census tract of primary residential address

RHDS <-as_tibble(read.csv(paste0(dataFold,"ABCD_RHDS01_DATA_TABLE.csv"))) 

# RHDS %>% filter(EVENTNAME =="baseline_year_1_arm_1") %>% select(RESHIST_ADDR1_GRNDTOT) %>%
#   #select(-1:-8) %>%
#   summarytools::dfSummary(
#                         style = 'grid', graph.magnif = 0.75,
#                         valid.col = FALSE, tmp.img.dir = "/tmp")

# Uniform Crime Reports seem to have some high values. quartic transformation will be applied.
hist(RHDS$RESHIST_ADDR1_GRNDTOT, breaks = 100)

hist((RHDS$RESHIST_ADDR1_GRNDTOT)^(1/4), breaks = 100)

ResidHistDer <- RHDS %>% 
  dplyr::select(SUBJECTKEY,EVENTNAME,
         RESHIST_ADDR1_ADI_WSUM, RESHIST_ADDR1_GRNDTOT, RESHIST_ADDR1_LEADRISK) %>%
  rename(area_deprivation_index = RESHIST_ADDR1_ADI_WSUM) %>%
  mutate(quartic_uniform_crime_reports = (RESHIST_ADDR1_GRNDTOT)^(1/4)) %>% 
  dplyr::select(-RESHIST_ADDR1_GRNDTOT) %>%
  rename(lead_risk = RESHIST_ADDR1_LEADRISK) 
  
ResidHistDer %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
area_deprivation_index 879 0.93 94.64 21.16 1.07 87.92 98.78 108.39 125.75 ▁▁▂▇▇
lead_risk 654 0.94 5.10 3.11 1.00 2.00 5.00 8.00 10.00 ▇▆▅▅▆
quartic_uniform_crime_reports 652 0.95 12.09 5.78 0.00 9.41 12.28 15.20 24.29 ▂▃▇▅▁
ResidHistDer %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
area_deprivation_index 6973 0.33 95.83 17.85 3.39 88.32 98.72 107.64 125.75 ▁▁▁▇▇
lead_risk 6893 0.34 4.99 3.13 1.00 2.00 5.00 8.00 10.00 ▇▅▅▃▆
quartic_uniform_crime_reports 6892 0.34 12.04 5.65 0.00 9.41 12.28 15.20 24.29 ▂▃▇▅▁

2.8 Proximal Environment

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The “Safety from Crime” items from the PhenX Toolkit was used to assess neighborhood safety and crime reports. Additionally, children reported their schoolrisk and protective factors via a 12-item Inventory for School Risk and Protective Factors of the PhenX toolkit.

2.8.1 ABCD Neighborhood Safety/Crime Survey Modified from PhenX (NSC)

from parents and children

NeighboSafety_parent <-as_tibble(read.csv(paste0(dataFold,"ABCD_PNSC01_DATA_TABLE.csv"))) %>% 
  mutate(neighbo_safety_parent_sum = rowSums(dplyr::select(.,starts_with("NEIGHBORHOOD")),na.rm=F)) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, neighbo_safety_parent_sum)

# I feel safe walking in my neighborhood, day or night. Me siento seguro(a) caminando por mi vecindario, de día o de noche.
# Violence is not a problem in my neighborhood./ La violencia no es un problema en mi vecindario.
# My neighborhood is safe from crime. Mi vecindario est√° a salvo de la delincuencia.
#1 = Strongly Disagree /Muy en desacuerdo; 2 = Disagree /En desacuerdo; 3 = Neutral (neither agree nor disagree)/ Neutral (ni de acuerdo ni en desacuerdo); 4 = Agree /De acuerdo; 5 = Strongly Agree/ Muy de acuerdo//The following questions are about your neighborhood. Your neighborhood is the area within about a 20-minute walk (or about a mile) from your home. For each of the statements please indicate whether you strongly agree, agree, neither agree nor disagree, disagree, or strongly disagree

NeighboSafety_children<-as_tibble(read.csv(paste0(dataFold,"ABCD_NSC01_DATA_TABLE.csv"))) %>% 
  rename(neighbo_safety_child_sum = NEIGHBORHOOD_CRIME_Y) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, neighbo_safety_child_sum)

#My neighborhood is safe from crime.

NeighboSafety <- plyr::join_all(list(NeighboSafety_parent, NeighboSafety_children), by=c('SUBJECTKEY','EVENTNAME'), type='full')

NeighboSafety  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
neighbo_safety_parent_sum 47 1 11.67 2.93 3 10 12 14 15 ▁▁▃▆▇
neighbo_safety_child_sum 24 1 4.03 1.09 1 3 4 5 5 ▁▁▃▆▇
NeighboSafety  %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
neighbo_safety_parent_sum 120 0.99 11.62 2.81 3 10 12 14 15 ▁▁▅▆▇
neighbo_safety_child_sum 32 1.00 4.09 1.00 1 4 4 5 5 ▁▁▃▆▇

2.8.2 School Risk and Protective Factors Survey

SchRisk <-as_tibble(read.csv(paste0(dataFold,"SRPF01_DATA_TABLE.csv"))) %>% 
 rename(EVENTNAME = VISIT)

school_risk_sum <- SchRisk %>% 
  mutate(sumSchool_environment = rowSums(dplyr::select(., "SCHOOL_2_Y", "SCHOOL_3_Y", "SCHOOL_4_Y", "SCHOOL_5_Y", "SCHOOL_6_Y", "SCHOOL_7_Y"))) %>%
  mutate(sumSchool_involvement = rowSums(dplyr::select(., "SCHOOL_8_Y", "SCHOOL_9_Y", "SCHOOL_10_Y", "SCHOOL_12_Y"))) %>%
  mutate(sumSchool_disengagement = rowSums(dplyr::select(., "SCHOOL_15_Y", "SCHOOL_17_Y"))) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, starts_with("sumSchool"))

school_risk_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sumSchool_environment 27 1 19.93 2.83 6 18 20 22 24 ▁▁▂▇▇
sumSchool_involvement 26 1 13.06 2.37 4 12 13 15 16 ▁▁▃▅▇
sumSchool_disengagement 25 1 3.74 1.46 2 3 4 5 8 ▇▃▃▁▁
school_risk_sum %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 5
_______________________
Column type frequency:
character 2
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sumSchool_environment 35 1 19.64 2.78 6 18 20 22 24 ▁▁▂▇▇
sumSchool_involvement 35 1 12.67 2.33 4 11 13 14 16 ▁▁▅▇▇
sumSchool_disengagement 35 1 3.99 1.35 2 3 4 5 8 ▇▆▅▂▁

2.9 Social Interaction

From Zhang et al. Translational Psychiatry (2020) https://doi.org/10.1038/s41398-020-0761-6 The child-reported parental monitoring and acceptance, as well as the child- and parent-reported prosocial tendency and family conflicts were included to measure social interactions. Parent monitoring was accessed by a 5-item summary score of the Parental Monitoring Scale24. Parent acceptance was evaluated by the Acceptance Scale, a subscale of the Child Report of Behavior Inventory (CRPBI)25. Prosocial behavior (e.g., being nice, helping, caring) was assessed using the Prosocial Behavior Scale, a subscale from the “Strengths and Difficulties Questionnaire” (SDQ)26. Both parents and youth reported on the youth’s prosocial behavior (e.g., being considerate of other people’s feelings, often offering to help others). In order to assess the family conflicts, the ABCD protocol utilized a 9-item Family Conflict subscale of the Moos Family Environment Scale (FES) for the baseline protocol27.

2.9.1 ABCD Parental Monitoring Survey

ParMonSur <-as_tibble(read.csv(paste0(dataFold,"PMQ01_DATA_TABLE.csv")))

# How often do your parents/guardians know where you are?
# How often do your parents know who you are with when you are not at school and away from home?
# If you are at home when your parents or guardians are not, how often do you know how to get in touch with them?
# How often do you talk to your mom/dad or guardian about your plans for the coming day, such as your plans about what will happen at school or what you are going to do with friends?
# In an average week, how many times do you and your parents/guardians, eat dinner together?
#1 = Never; 2 = Almost Never; 3 = Sometimes; 4 = Often; 5 = Always or Almost Always

ParentMonitoring <- ParMonSur %>% 
  mutate(parent_monitor_mean = rowMeans(dplyr::select(., starts_with("PARENT_MONITOR_")))) %>%
  dplyr::select(SUBJECTKEY,EVENTNAME, parent_monitor_mean)

ParentMonitoring %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
      skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
parent_monitor_mean 23 1 4.38 0.52 1 4.2 4.4 4.8 5 ▁▁▁▃▇
ParentMonitoring %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
      skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
parent_monitor_mean 32 1 4.49 0.47 1 4.2 4.6 4.8 5 ▁▁▁▂▇

2.9.2 ABCD Family Conflict

ABCD Parents Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes02 ABCD Youth Reported Parent Family Environment Scale-Family Conflict Subscale Modified from PhenX (FES), version 2 - short name: fes01

FamilyConflict_Parents <-as_tibble(read.csv(paste0(dataFold,"FES02_DATA_TABLE.csv"))) %>%
    distinct(dplyr::select(.,-FES02_ID, -DATASET_ID),.keep_all = TRUE) #for some reason there is a duplicate based on these two variables

# We fight a lot in our family. /Peleamos mucho en nuestra familia.
# Family members rarely become openly angry. /Los miembros de la familia raramente se enojan abiertamente.
# Family members sometimes get so angry they throw things./ Los miembros de la familia algunas veces se enojan tanto que avientan cosas.
# Family members hardly ever lose their tempers. /Los miembros de la familia dificilmente pierden su temperamento.
# Family members often criticize each other. /Los miembros de la familia con frecuencia se critican unos a otros.
# Family members sometimes hit each other./ Los miembros de la familia algunas veces se golpean unos a otros.
# If there is a disagreement in our family, we try hard to smooth things over and keep the peace. /Si hay un desacuerdo en nuestra familia, hacemos todo lo posible por resolverlo y conservar la paz.
# Family members often try to one-up or outdo each other./ Los miembros de la familia con frecuencia tratan de superar a los dem√°s.
# In our family, we believe you don't ever get anywhere by raising your voice. /En nuestra familia, creemos que no se llega a nada levantando la voz.
# 1 = True /Verdadera; 0 = False/ Falsa or 0 = True /Verdadera; 1 = False/ Falsa  

FamilyConflict_parents_sum <- FamilyConflict_Parents  %>% 
  mutate(fam_conflict_parent = rowSums(dplyr::select(., FAM_ENVIRO1_P:FAM_ENVIRO9R_P ), na.rm = F)) %>%           
  dplyr::select(SUBJECTKEY,EVENTNAME, fam_conflict_parent)

# FamilyConflict_parents_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_Children <-as_tibble(read.csv(paste0(dataFold,"ABCD_FES01_DATA_TABLE.csv"))) 

# We fight a lot in our family.
# Family members rarely become openly angry.
# Family members sometimes get so angry they throw things.
# Family members hardly ever lose their tempers.
# Family members often criticize each other.
# Family members sometimes hit each other.
# If there's a disagreement in our family, we try hard to smooth things over and keep the peace.
# Family members often try to one-up or outdo each other.
# In our family, we believe you don't ever get anywhere by raising your voice.

FamilyConflict_children_sum <- FamilyConflict_Children   %>% 
  mutate(fam_conflict_children = rowSums(dplyr::select(., FES_YOUTH_Q1:FES_YOUTH_Q9 ), na.rm = F)) %>%           
  dplyr::select(SUBJECTKEY,EVENTNAME, fam_conflict_children)

# FamilyConflict_children_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
#    summarytools::dfSummary(
#                          style = 'grid', graph.magnif = 0.75, 
#                          valid.col = FALSE, tmp.img.dir = "/tmp")

FamilyConflict_sum <- plyr::join_all(list(FamilyConflict_parents_sum , FamilyConflict_children_sum), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full')

FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
    skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
fam_conflict_parent 12 1 2.54 1.96 0 1 2 4 9 ▇▇▅▂▁
fam_conflict_children 27 1 2.05 1.95 0 0 2 3 9 ▇▅▂▁▁
# FamilyConflict_Parents %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_Children %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY)
# FamilyConflict_sum  %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% arrange(SUBJECTKEY) 

FamilyConflict_sum  %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
    skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
fam_conflict_parent 67 0.99 2.43 1.97 0 1 2 4 9 ▇▇▃▂▁
fam_conflict_children 36 1.00 1.91 1.82 0 0 1 3 9 ▇▅▂▁▁

2.9.3 Prosocial Tendency

Parent Prosocial Behavior Survey Youth Prosocial Behavior Survey

#Parent Prosocial Behavior Survey
ParPS <-as_tibble(read.csv(paste0(dataFold,"PSB01_DATA_TABLE.csv"))) 

#Youth Prosocial Behavior Survey
YouthPS <-as_tibble(read.csv(paste0(dataFold,"ABCD_PSB01_DATA_TABLE.csv")))

prosocial_sum <- plyr::join_all(list(ParPS , YouthPS), 
                                by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
 mutate(prosocial_parent_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_P", "PROSOCIAL_Q2_P", "PROSOCIAL_Q3_P"))) %>%
 mutate(prosocial_youth_mean = rowMeans(dplyr::select(., "PROSOCIAL_Q1_Y", "PROSOCIAL_Q2_Y", "PROSOCIAL_Q3_Y"))) %>%
 dplyr::select(SUBJECTKEY,EVENTNAME, prosocial_parent_mean,prosocial_youth_mean)  

prosocial_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
prosocial_parent_mean 62 0.99 1.75 0.40 0 1.67 2.00 2 2 ▁▁▁▁▇
prosocial_youth_mean 33 1.00 1.68 0.37 0 1.33 1.67 2 2 ▁▁▁▂▇
prosocial_sum %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 4
_______________________
Column type frequency:
character 2
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10414 0
EVENTNAME 0 1 24 24 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
prosocial_parent_mean 86 0.99 1.72 0.42 0 1.67 2 2 2 ▁▁▁▁▇
prosocial_youth_mean 32 1.00 1.71 0.37 0 1.33 2 2 2 ▁▁▁▂▇

2.10 Parent Sports and Activities

2.10.1 ABCD Sum Scores Parent Sports and Activities Involvement

multiplying hours x days x 4 weeks x months x years /24 to get days this method leads to high zeros. this might be because of the 999 -> 0??

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_multiplied_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ .125,
                                   . == 9 ~ .25,
                                   . == 8 ~ .5,
                                   TRUE ~ as.numeric(.)))) %>%
  # mutate_at(.vars = vars(ends_with("_PERWK_P")),
  #           .funs = funs(case_when(. == 10 ~ 1,
  #                                  . == 9 ~ 2,
  #                                  . == 8 ~ 3,
  #                                  . == 1 ~ 4,
  #                                  . == 2 ~ 5,
  #                                  . == 3 ~ 6,
  #                                  . == 4 ~ 7,
  #                                  . == 5 ~ 8,
  #                                  . == 6 ~ 9,
  #                                  . == 7 ~ 10,
  #                                  TRUE ~ as.numeric(.)))) %>%
  #0 = 0; 1 = less than 30 minutes; 2 = 30; 3 = 45; 4 = 60 (1 hr); 5 = 90 (1.5 hrs); 6 = 120 (2 hrs); 7 = 150 (2.5 hrs); 8 = 180 (3 hrs); 9 = greater than 3 hours; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0".  
  #change to hours unit. assumes less than 30 minutes to be .25 hour(15 mins) and > 3 hrs to be 4 hours
 mutate_at(.vars = vars(ends_with("_TSPENT_P")),
            .funs = funs(case_when(. == 1 ~ .25,
                                   . == 2 ~ .5,
                                   . == 3 ~ .75,
                                   . == 4 ~ 1,
                                   . == 5 ~ 1.5,
                                   . == 6 ~ 2,
                                   . == 7 ~ 2.5,
                                   . == 8 ~ 3,
                                   . == 9 ~ 4,
                                   TRUE ~ as.numeric(.)))) %>%
  #hours x days x 4 weeks x months x years /24 to get days
  mutate(dance_days = SAI_SS_DANCE_TSPENT_P*SAI_SS_DANCE_PERWK_P*4*SAI_SS_DANCE_NMONTH_P*SAI_SS_DANCE_NYR_P/24) %>%
  mutate(base_days = SAI_SS_BASE_TSPENT_P*SAI_SS_BASE_PERWK_P*4*SAI_SS_BASE_NMONTH_P*SAI_SS_BASE_NYR_P/24) %>%
  mutate(basket_days = SAI_SS_BASKET_TSPENT_P*SAI_SS_BASKET_PERWK_P*4*SAI_SS_BASKET_NMONTH_P*SAI_SS_BASKET_NYR_P/24) %>%
  mutate(climb_days = SAI_SS_CLIMB_TSPENT_P*SAI_SS_CLIMB_PERWK_P*4*SAI_SS_CLIMB_NMONTH_P*SAI_SS_CLIMB_NYR_P/24) %>%
  mutate(fball_days = SAI_SS_FBALL_TSPENT_P*SAI_SS_FBALL_PERWK_P*4*SAI_SS_FBALL_NMONTH_P*SAI_SS_FBALL_NYR_P/24) %>%
  mutate(fhock_days = SAI_SS_FHOCK_TSPENT_P*SAI_SS_FHOCK_PERWK_P*4*SAI_SS_FHOCK_NMONTH_P*SAI_SS_FHOCK_NYR_P/24) %>%
  mutate(gym_days = SAI_SS_GYM_TSPENT_P*SAI_SS_GYM_PERWK_P*4*SAI_SS_GYM_NMONTH_P*SAI_SS_GYM_NYR_P/24) %>%
  mutate(ihock_days = SAI_SS_IHOCK_TSPENT_P*SAI_SS_IHOCK_PERWK_P*4*SAI_SS_IHOCK_NMONTH_P*SAI_SS_IHOCK_NYR_P/24) %>%
  mutate(polo_days = SAI_SS_POLO_TSPENT_P*SAI_SS_POLO_PERWK_P*4*SAI_SS_POLO_NMONTH_P*SAI_SS_POLO_NYR_P/24) %>%
  mutate(iskate_days = SAI_SS_ISKATE_TSPENT_P*SAI_SS_ISKATE_PERWK_P*4*SAI_SS_ISKATE_NMONTH_P*SAI_SS_ISKATE_NYR_P/24) %>%
  mutate(m_arts_days = SAI_SS_M_ARTS_TSPENT_P*SAI_SS_M_ARTS_PERWK_P*4*SAI_SS_M_ARTS_NMONTH_P*SAI_SS_M_ARTS_NYR_P/24) %>%
  mutate(lax_days = SAI_SS_LAX_TSPENT_P*SAI_SS_LAX_PERWK_P*4*SAI_SS_LAX_NMONTH_P*SAI_SS_LAX_NYR_P/24) %>%
  mutate(rugby_days = SAI_SS_RUGBY_TSPENT_P*SAI_SS_RUGBY_PERWK_P*4*SAI_SS_RUGBY_NMONTH_P*SAI_SS_RUGBY_NYR_P/24) %>%
  mutate(skate_days = SAI_SS_SKATE_TSPENT_P*SAI_SS_SKATE_PERWK_P*4*SAI_SS_SKATE_NMONTH_P*SAI_SS_SKATE_NYR_P/24) %>%
  mutate(sboard_days = SAI_SS_SBOARD_TSPENT_P*SAI_SS_SBOARD_PERWK_P*4*SAI_SS_SBOARD_NMONTH_P*SAI_SS_SBOARD_NYR_P/24) %>%
  mutate(soc_days = SAI_SS_SOC_TSPENT_P*SAI_SS_SOC_PERWK_P*4*SAI_SS_SOC_NMONTH_P*SAI_SS_SOC_NYR_P/24) %>%
  mutate(surf_days = SAI_SS_SURF_TSPENT_P*SAI_SS_SURF_PERWK_P*4*SAI_SS_SURF_NMONTH_P*SAI_SS_SURF_NYR_P/24) %>%
  mutate(wpolo_days = SAI_SS_WPOLO_TSPENT_P*SAI_SS_WPOLO_PERWK_P*4*SAI_SS_WPOLO_NMONTH_P*SAI_SS_WPOLO_NYR_P/24) %>%
  mutate(tennis_days = SAI_SS_TENNIS_TSPENT_P*SAI_SS_TENNIS_PERWK_P*4*SAI_SS_TENNIS_NMONTH_P*SAI_SS_TENNIS_NYR_P/24) %>%
  mutate(run_days = SAI_SS_RUN_TSPENT_P*SAI_SS_RUN_PERWK_P*4*SAI_SS_RUN_NMONTH_P*SAI_SS_RUN_NYR_P/24) %>%
  mutate(mma_days = SAI_SS_MMA_TSPENT_P*SAI_SS_MMA_PERWK_P*4*SAI_SS_MMA_NMONTH_P*SAI_SS_MMA_NYR_P/24) %>%
  mutate(vball_days = SAI_SS_VBALL_TSPENT_P*SAI_SS_VBALL_PERWK_P*4*SAI_SS_VBALL_NMONTH_P*SAI_SS_VBALL_NYR_P/24) %>%
  mutate(yoga_days = SAI_SS_YOGA_TSPENT_P*SAI_SS_YOGA_PERWK_P*4*SAI_SS_YOGA_NMONTH_P*SAI_SS_YOGA_NYR_P/24) %>%
  mutate(music_days = SAI_SS_MUSIC_TSPENT_P*SAI_SS_MUSIC_PERWK_P*4*SAI_SS_MUSIC_NMONTH_P*SAI_SS_MUSIC_NYR_P/24) %>%
  mutate(art_days = SAI_SS_ART_TSPENT_P*SAI_SS_ART_PERWK_P*4*SAI_SS_ART_NMONTH_P*SAI_SS_ART_NYR_P/24) %>%
  mutate(drama_days = SAI_SS_DRAMA_TSPENT_P*SAI_SS_DRAMA_PERWK_P*4*SAI_SS_DRAMA_NMONTH_P*SAI_SS_DRAMA_NYR_P/24) %>%
  mutate(craft_days = SAI_SS_CRAFTS_TSPENT_P*SAI_SS_CRAFTS_PERWK_P*4*SAI_SS_CRAFTS_NMONTH_P*SAI_SS_CRAFTS_NYR_P/24) %>%
  mutate(chess_days = SAI_SS_CHESS_TSPENT_P*SAI_SS_CHESS_PERWK_P*4*SAI_SS_CHESS_NMONTH_P*SAI_SS_CHESS_NYR_P/24) %>%
  mutate(collect_days = SAI_SS_COLLECT_TSPENT_P*SAI_SS_COLLECT_PERWK_P*4*SAI_SS_COLLECT_NMONTH_P*SAI_SS_COLLECT_NYR_P/24) %>%
# didn't include listening to music or reading since they are in the different scale
# summary based on kerlic's child dev paper
  mutate(phys_ind_days_sum = sboard_days + climb_days + gym_days + iskate_days + m_arts_days + skate_days + dance_days + surf_days + tennis_days + run_days + mma_days + yoga_days) %>%
  mutate(phys_team_days_sum = base_days + basket_days + fhock_days + fball_days + ihock_days + polo_days + lax_days + rugby_days + soc_days + wpolo_days +vball_days) %>%
  mutate(art_days_sum = collect_days + music_days + art_days + drama_days + craft_days + chess_days) %>%
  mutate(sport_act_all_days_sum = phys_ind_days_sum + phys_team_days_sum + art_days_sum) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum)

sport_act_multiplied_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 167
_______________________
Column type frequency:
character 5
numeric 162
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11876 0
SRC_SUBJECT_ID 0 1 16 16 0 11876 0
INTERVIEW_DATE 0 1 9 9 0 756 0
SEX 0 1 1 1 0 2 0
EVENTNAME 0 1 21 21 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ABCD_SPACSS01_ID 0 1.00 34214.50 3428.45 28277 31245.75 34214.50 37183.25 40152.00 ▇▇▇▇▇
DATASET_ID 0 1.00 47120.00 0.00 47120 47120.00 47120.00 47120.00 47120.00 ▁▁▇▁▁
INTERVIEW_AGE 0 1.00 118.98 7.50 107 112.00 119.00 126.00 133.00 ▇▆▆▆▆
SAI_SS_DANCE_NYR_P 24 1.00 0.64 1.44 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_DANCE_NMONTH_P 26 1.00 1.93 3.68 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_DANCE_PERWK_P 20 1.00 0.42 0.89 0 0.00 0.00 0.25 7.00 ▇▁▁▁▁
SAI_SS_DANCE_TSPENT_P 30 1.00 0.26 0.51 0 0.00 0.00 0.00 4.00 ▇▂▁▁▁
SAI_SS_BASE_NYR_P 17 1.00 0.74 1.50 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_BASE_NMONTH_P 23 1.00 1.14 2.16 0 0.00 0.00 2.00 12.00 ▇▂▁▁▁
SAI_SS_BASE_PERWK_P 23 1.00 0.71 1.26 0 0.00 0.00 2.00 7.00 ▇▁▂▁▁
SAI_SS_BASE_TSPENT_P 27 1.00 0.37 0.66 0 0.00 0.00 1.00 4.00 ▇▂▁▁▁
SAI_SS_BASKET_NYR_P 21 1.00 0.59 1.23 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_BASKET_NMONTH_P 25 1.00 1.07 2.14 0 0.00 0.00 1.00 12.00 ▇▂▁▁▁
SAI_SS_BASKET_PERWK_P 26 1.00 0.62 1.17 0 0.00 0.00 1.00 7.00 ▇▂▁▁▁
SAI_SS_BASKET_TSPENT_P 50 1.00 0.29 0.54 0 0.00 0.00 0.50 4.00 ▇▂▁▁▁
SAI_SS_CLIMB_NYR_P 15 1.00 0.08 0.53 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CLIMB_NMONTH_P 14 1.00 0.16 1.09 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CLIMB_PERWK_P 19 1.00 0.06 0.40 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CLIMB_TSPENT_P 19 1.00 0.04 0.24 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_FHOCK_NYR_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_FHOCK_NMONTH_P 11 1.00 0.02 0.34 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_FHOCK_PERWK_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_FHOCK_TSPENT_P 10 1.00 0.01 0.10 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_FBALL_NYR_P 18 1.00 0.25 0.86 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_FBALL_NMONTH_P 21 1.00 0.44 1.42 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_FBALL_PERWK_P 20 1.00 0.35 1.10 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_FBALL_TSPENT_P 24 1.00 0.17 0.51 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_GYM_NYR_P 15 1.00 0.49 1.16 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_GYM_NMONTH_P 19 1.00 1.71 3.59 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_GYM_PERWK_P 21 1.00 0.33 0.77 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_GYM_TSPENT_P 31 1.00 0.26 0.56 0 0.00 0.00 0.00 4.00 ▇▂▁▁▁
SAI_SS_IHOCK_NYR_P 10 1.00 0.08 0.56 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_IHOCK_NMONTH_P 13 1.00 0.14 1.00 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_IHOCK_PERWK_P 10 1.00 0.07 0.48 0 0.00 0.00 0.00 6.00 ▇▁▁▁▁
SAI_SS_IHOCK_TSPENT_P 11 1.00 0.03 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_POLO_NYR_P 11 1.00 0.08 0.52 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_POLO_NMONTH_P 14 1.00 0.22 1.38 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_POLO_PERWK_P 14 1.00 0.05 0.36 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_POLO_TSPENT_P 17 1.00 0.04 0.25 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_ISKATE_NYR_P 11 1.00 0.13 0.64 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_ISKATE_NMONTH_P 16 1.00 0.27 1.38 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_ISKATE_PERWK_P 18 1.00 0.08 0.40 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_ISKATE_TSPENT_P 19 1.00 0.06 0.26 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_M_ARTS_NYR_P 16 1.00 0.36 0.97 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_M_ARTS_NMONTH_P 23 1.00 1.49 3.63 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_M_ARTS_PERWK_P 19 1.00 0.36 0.91 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_M_ARTS_TSPENT_P 25 1.00 0.17 0.40 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_LAX_NYR_P 10 1.00 0.06 0.41 0 0.00 0.00 0.00 8.00 ▇▁▁▁▁
SAI_SS_LAX_NMONTH_P 12 1.00 0.11 0.76 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_LAX_PERWK_P 15 1.00 0.07 0.42 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_LAX_TSPENT_P 13 1.00 0.04 0.23 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_RUGBY_NYR_P 10 1.00 0.01 0.14 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUGBY_NMONTH_P 10 1.00 0.01 0.28 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_RUGBY_PERWK_P 10 1.00 0.01 0.15 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUGBY_TSPENT_P 10 1.00 0.00 0.09 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SKATE_NYR_P 13 1.00 0.09 0.56 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_SKATE_NMONTH_P 17 1.00 0.22 1.33 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SKATE_PERWK_P 14 1.00 0.10 0.62 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_SKATE_TSPENT_P 18 1.00 0.03 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SBOARD_NYR_P 10 1.00 0.33 1.25 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_SBOARD_NMONTH_P 20 1.00 0.27 0.99 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SBOARD_PERWK_P 12 1.00 0.10 0.42 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_SBOARD_TSPENT_P 16 1.00 0.25 0.90 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_SOC_NYR_P 31 1.00 1.21 1.85 0 0.00 0.00 2.00 10.00 ▇▁▁▁▁
SAI_SS_SOC_NMONTH_P 44 1.00 2.04 3.05 0 0.00 0.00 4.00 12.00 ▇▂▁▁▁
SAI_SS_SOC_PERWK_P 38 1.00 0.95 1.27 0 0.00 0.00 2.00 7.00 ▇▂▂▁▁
SAI_SS_SOC_TSPENT_P 58 1.00 0.47 0.62 0 0.00 0.00 1.00 4.00 ▇▅▁▁▁
SAI_SS_SURF_NYR_P 10 1.00 0.01 0.21 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_SURF_NMONTH_P 12 1.00 0.02 0.31 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_SURF_PERWK_P 10 1.00 0.01 0.18 0 0.00 0.00 0.00 5.00 ▇▁▁▁▁
SAI_SS_SURF_TSPENT_P 11 1.00 0.01 0.15 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_WPOLO_NYR_P 22 1.00 1.16 2.14 0 0.00 0.00 2.00 10.00 ▇▁▁▁▁
SAI_SS_WPOLO_NMONTH_P 42 1.00 1.76 3.24 0 0.00 0.00 3.00 12.00 ▇▁▁▁▁
SAI_SS_WPOLO_PERWK_P 35 1.00 0.78 1.43 0 0.00 0.00 1.00 7.00 ▇▁▁▁▁
SAI_SS_WPOLO_TSPENT_P 47 1.00 0.31 0.56 0 0.00 0.00 0.50 4.00 ▇▂▁▁▁
SAI_SS_TENNIS_NYR_P 15 1.00 0.14 0.63 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_TENNIS_NMONTH_P 17 1.00 0.31 1.46 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_TENNIS_PERWK_P 19 1.00 0.12 0.54 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_TENNIS_TSPENT_P 21 1.00 0.07 0.29 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_RUN_NYR_P 18 1.00 0.14 0.62 0 0.00 0.00 0.00 9.00 ▇▁▁▁▁
SAI_SS_RUN_NMONTH_P 20 1.00 0.29 1.27 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_RUN_PERWK_P 24 1.00 0.16 0.68 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_RUN_TSPENT_P 20 1.00 0.08 0.31 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_MMA_NYR_P 16 1.00 0.06 0.45 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_MMA_NMONTH_P 20 1.00 0.17 1.16 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_MMA_PERWK_P 16 1.00 0.08 0.49 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_MMA_TSPENT_P 20 1.00 0.04 0.24 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_VBALL_NYR_P 13 1.00 0.04 0.27 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_VBALL_NMONTH_P 15 1.00 0.09 0.66 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_VBALL_PERWK_P 17 1.00 0.05 0.36 0 0.00 0.00 0.00 6.00 ▇▁▁▁▁
SAI_SS_VBALL_TSPENT_P 14 1.00 0.03 0.20 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_YOGA_NYR_P 13 1.00 0.04 0.37 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_YOGA_NMONTH_P 13 1.00 0.12 0.96 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_YOGA_PERWK_P 14 1.00 0.03 0.30 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_YOGA_TSPENT_P 16 1.00 0.02 0.12 0 0.00 0.00 0.00 3.00 ▇▁▁▁▁
SAI_SS_MUSIC_NYR_P 25 1.00 0.90 1.48 0 0.00 0.00 1.00 10.00 ▇▁▁▁▁
SAI_SS_MUSIC_NMONTH_P 39 1.00 3.38 4.65 0 0.00 0.00 8.00 12.00 ▇▁▁▁▂
SAI_SS_MUSIC_PERWK_P 32 1.00 0.84 1.44 0 0.00 0.00 1.00 7.00 ▇▁▁▁▁
SAI_SS_MUSIC_TSPENT_P 42 1.00 0.29 0.42 0 0.00 0.00 0.50 4.00 ▇▁▁▁▁
SAI_SS_ART_NYR_P 27 1.00 0.79 1.94 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_ART_NMONTH_P 36 1.00 1.60 3.67 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_ART_PERWK_P 41 1.00 0.54 1.39 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_ART_TSPENT_P 38 1.00 0.18 0.45 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_DRAMA_NYR_P 14 1.00 0.23 0.83 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_DRAMA_NMONTH_P 29 1.00 0.53 1.83 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_DRAMA_PERWK_P 27 1.00 0.23 0.83 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_DRAMA_TSPENT_P 24 1.00 0.16 0.54 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_CRAFTS_NYR_P 21 1.00 0.28 1.12 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CRAFTS_NMONTH_P 28 1.00 0.62 2.40 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CRAFTS_PERWK_P 27 1.00 0.21 0.85 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CRAFTS_TSPENT_P 27 1.00 0.08 0.30 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_CHESS_NYR_P 22 1.00 0.27 0.96 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_CHESS_NMONTH_P 28 1.00 0.70 2.36 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_CHESS_PERWK_P 34 1.00 0.17 0.66 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_CHESS_TSPENT_P 33 1.00 0.10 0.32 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_COLLECT_NYR_P 17 1.00 0.21 0.93 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
SAI_SS_COLLECT_NMONTH_P 28 1.00 0.53 2.32 0 0.00 0.00 0.00 12.00 ▇▁▁▁▁
SAI_SS_COLLECT_PERWK_P 24 1.00 0.17 0.86 0 0.00 0.00 0.00 7.00 ▇▁▁▁▁
SAI_SS_COLLECT_TSPENT_P 23 1.00 0.04 0.20 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
SAI_SS_LMUSIC_YEARS_P 1220 0.90 3.84 3.22 0 1.00 3.00 6.00 10.00 ▇▃▂▂▃
SAI_SS_LMUSIC_HOURS_P 1697 0.86 4.64 7.58 0 1.00 3.00 6.00 168.00 ▇▁▁▁▁
SAI_SS_READ_YEARS_P 591 0.95 2.72 2.38 0 0.00 3.00 4.00 10.00 ▇▅▃▁▁
SAI_SS_READ_HOURS_P 831 0.93 4.62 9.22 0 0.00 3.00 6.00 168.00 ▇▁▁▁▁
SPORTS_ACTIVITY_SS_LMUSIC_P 2069 0.83 169.79 369.57 1 3.00 6.00 9.00 999.00 ▇▁▁▁▂
SPORTS_ACTIVITY_SS_READ_P 3309 0.72 91.02 278.59 1 4.00 6.00 8.00 999.00 ▇▁▁▁▁
dance_days 54 1.00 2.28 12.64 0 0.00 0.00 0.00 560.00 ▇▁▁▁▁
base_days 52 1.00 2.58 9.11 0 0.00 0.00 0.67 266.67 ▇▁▁▁▁
basket_days 74 0.99 1.42 5.58 0 0.00 0.00 0.00 147.00 ▇▁▁▁▁
climb_days 28 1.00 0.14 1.93 0 0.00 0.00 0.00 128.00 ▇▁▁▁▁
fball_days 35 1.00 1.02 6.11 0 0.00 0.00 0.00 245.00 ▇▁▁▁▁
fhock_days 11 1.00 0.02 0.67 0 0.00 0.00 0.00 55.00 ▇▁▁▁▁
gym_days 54 1.00 1.99 12.01 0 0.00 0.00 0.00 392.00 ▇▁▁▁▁
ihock_days 14 1.00 0.35 3.31 0 0.00 0.00 0.00 75.00 ▇▁▁▁▁
polo_days 23 1.00 0.17 2.54 0 0.00 0.00 0.00 135.00 ▇▁▁▁▁
iskate_days 28 1.00 0.21 2.14 0 0.00 0.00 0.00 108.00 ▇▁▁▁▁
m_arts_days 48 1.00 1.35 5.69 0 0.00 0.00 0.00 175.00 ▇▁▁▁▁
lax_days 18 1.00 0.16 1.68 0 0.00 0.00 0.00 60.00 ▇▁▁▁▁
rugby_days 10 1.00 0.02 0.53 0 0.00 0.00 0.00 26.67 ▇▁▁▁▁
skate_days 23 1.00 0.29 3.97 0 0.00 0.00 0.00 261.33 ▇▁▁▁▁
sboard_days 28 1.00 0.85 4.93 0 0.00 0.00 0.00 128.00 ▇▁▁▁▁
soc_days 107 0.99 3.84 10.13 0 0.00 0.00 3.00 196.00 ▇▁▁▁▁
surf_days 13 1.00 0.03 0.87 0 0.00 0.00 0.00 48.00 ▇▁▁▁▁
wpolo_days 90 0.99 2.97 10.38 0 0.00 0.00 1.25 256.00 ▇▁▁▁▁
tennis_days 31 1.00 0.31 4.56 0 0.00 0.00 0.00 420.00 ▇▁▁▁▁
run_days 36 1.00 0.30 2.29 0 0.00 0.00 0.00 96.00 ▇▁▁▁▁
mma_days 25 1.00 0.28 3.06 0 0.00 0.00 0.00 175.00 ▇▁▁▁▁
vball_days 20 1.00 0.07 0.73 0 0.00 0.00 0.00 44.00 ▇▁▁▁▁
yoga_days 17 1.00 0.07 1.05 0 0.00 0.00 0.00 48.00 ▇▁▁▁▁
music_days 82 0.99 2.34 7.09 0 0.00 0.00 1.67 224.00 ▇▁▁▁▁
art_days 70 0.99 3.68 17.62 0 0.00 0.00 0.00 504.00 ▇▁▁▁▁
drama_days 46 1.00 0.69 5.10 0 0.00 0.00 0.00 280.00 ▇▁▁▁▁
craft_days 43 1.00 1.11 8.54 0 0.00 0.00 0.00 280.00 ▇▁▁▁▁
chess_days 59 1.00 0.58 4.60 0 0.00 0.00 0.00 200.00 ▇▁▁▁▁
collect_days 39 1.00 0.62 7.08 0 0.00 0.00 0.00 448.00 ▇▁▁▁▁
phys_ind_days_sum 259 0.98 8.06 21.58 0 0.00 1.00 8.00 560.00 ▇▁▁▁▁
phys_team_days_sum 322 0.97 12.60 22.06 0 0.00 4.00 15.50 367.67 ▇▁▁▁▁
art_days_sum 263 0.98 8.85 26.38 0 0.00 0.33 6.00 600.00 ▇▁▁▁▁
sport_act_all_days_sum 735 0.94 29.34 43.86 0 3.33 15.33 38.00 826.17 ▇▁▁▁▁
phys_ind_daypweek_sum 93 0.99 1.84 2.42 0 0.00 1.00 3.00 44.75 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 3.67 3.75 0 0.00 3.00 6.00 36.00 ▇▁▁▁▁
art_daypweek_sum 118 0.99 2.15 3.23 0 0.00 1.00 3.00 36.00 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 7.63 6.40 0 3.00 6.12 11.00 103.25 ▇▁▁▁▁
sport_act_multiplied_sum %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 167
_______________________
Column type frequency:
character 5
numeric 162
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 NaN NA NA 0 0 0
SRC_SUBJECT_ID 0 NaN NA NA 0 0 0
INTERVIEW_DATE 0 NaN NA NA 0 0 0
SEX 0 NaN NA NA 0 0 0
EVENTNAME 0 NaN NA NA 0 0 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ABCD_SPACSS01_ID 0 NaN NaN NA NA NA NA NA NA
DATASET_ID 0 NaN NaN NA NA NA NA NA NA
INTERVIEW_AGE 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DANCE_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DANCE_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DANCE_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DANCE_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASE_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASE_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASE_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASE_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASKET_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASKET_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASKET_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_BASKET_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CLIMB_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CLIMB_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CLIMB_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CLIMB_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FHOCK_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FHOCK_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FHOCK_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FHOCK_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FBALL_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FBALL_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FBALL_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_FBALL_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_GYM_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_GYM_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_GYM_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_GYM_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_IHOCK_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_IHOCK_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_IHOCK_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_IHOCK_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_POLO_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_POLO_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_POLO_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_POLO_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ISKATE_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ISKATE_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ISKATE_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ISKATE_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_M_ARTS_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_M_ARTS_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_M_ARTS_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_M_ARTS_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LAX_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LAX_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LAX_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LAX_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUGBY_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUGBY_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUGBY_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUGBY_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SKATE_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SKATE_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SKATE_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SKATE_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SBOARD_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SBOARD_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SBOARD_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SBOARD_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SOC_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SOC_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SOC_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SOC_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SURF_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SURF_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SURF_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_SURF_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_WPOLO_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_WPOLO_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_WPOLO_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_WPOLO_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_TENNIS_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_TENNIS_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_TENNIS_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_TENNIS_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUN_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUN_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUN_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_RUN_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MMA_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MMA_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MMA_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MMA_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_VBALL_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_VBALL_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_VBALL_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_VBALL_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_YOGA_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_YOGA_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_YOGA_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_YOGA_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MUSIC_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MUSIC_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MUSIC_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_MUSIC_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ART_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ART_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ART_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_ART_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DRAMA_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DRAMA_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DRAMA_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_DRAMA_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CRAFTS_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CRAFTS_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CRAFTS_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CRAFTS_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CHESS_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CHESS_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CHESS_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_CHESS_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_COLLECT_NYR_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_COLLECT_NMONTH_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_COLLECT_PERWK_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_COLLECT_TSPENT_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LMUSIC_YEARS_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_LMUSIC_HOURS_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_READ_YEARS_P 0 NaN NaN NA NA NA NA NA NA
SAI_SS_READ_HOURS_P 0 NaN NaN NA NA NA NA NA NA
SPORTS_ACTIVITY_SS_LMUSIC_P 0 NaN NaN NA NA NA NA NA NA
SPORTS_ACTIVITY_SS_READ_P 0 NaN NaN NA NA NA NA NA NA
dance_days 0 NaN NaN NA NA NA NA NA NA
base_days 0 NaN NaN NA NA NA NA NA NA
basket_days 0 NaN NaN NA NA NA NA NA NA
climb_days 0 NaN NaN NA NA NA NA NA NA
fball_days 0 NaN NaN NA NA NA NA NA NA
fhock_days 0 NaN NaN NA NA NA NA NA NA
gym_days 0 NaN NaN NA NA NA NA NA NA
ihock_days 0 NaN NaN NA NA NA NA NA NA
polo_days 0 NaN NaN NA NA NA NA NA NA
iskate_days 0 NaN NaN NA NA NA NA NA NA
m_arts_days 0 NaN NaN NA NA NA NA NA NA
lax_days 0 NaN NaN NA NA NA NA NA NA
rugby_days 0 NaN NaN NA NA NA NA NA NA
skate_days 0 NaN NaN NA NA NA NA NA NA
sboard_days 0 NaN NaN NA NA NA NA NA NA
soc_days 0 NaN NaN NA NA NA NA NA NA
surf_days 0 NaN NaN NA NA NA NA NA NA
wpolo_days 0 NaN NaN NA NA NA NA NA NA
tennis_days 0 NaN NaN NA NA NA NA NA NA
run_days 0 NaN NaN NA NA NA NA NA NA
mma_days 0 NaN NaN NA NA NA NA NA NA
vball_days 0 NaN NaN NA NA NA NA NA NA
yoga_days 0 NaN NaN NA NA NA NA NA NA
music_days 0 NaN NaN NA NA NA NA NA NA
art_days 0 NaN NaN NA NA NA NA NA NA
drama_days 0 NaN NaN NA NA NA NA NA NA
craft_days 0 NaN NaN NA NA NA NA NA NA
chess_days 0 NaN NaN NA NA NA NA NA NA
collect_days 0 NaN NaN NA NA NA NA NA NA
phys_ind_days_sum 0 NaN NaN NA NA NA NA NA NA
phys_team_days_sum 0 NaN NaN NA NA NA NA NA NA
art_days_sum 0 NaN NaN NA NA NA NA NA NA
sport_act_all_days_sum 0 NaN NaN NA NA NA NA NA NA
phys_ind_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
phys_team_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
art_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
sport_act_all_daypweek_sum 0 NaN NaN NA NA NA NA NA NA

2.10.2 ABCD Sum Scores Parent Sports and Activities Involvement

method used by Kerlic et al. They focus only on days per week. The data were converted differently.

sport_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_SPACSS01_DATA_TABLE.csv"))) 

sport_act_kerlic_sum <- sport_act %>% 
  # change 999 to 0. don't know seems to infer that the child doesn't do that activiy
  mutate_at(vars(starts_with("SAI_SS_")), ~ replace(., which(.==999), 0)) %>%
  # 0 = 0; 1 = 1; 2 = 2; 3 = 3; 4 = 4; 5 = 5; 6 = 6; 7 = 7; 8 = Once every 2 weeks; 9 = One day every month; 10 = Less than one day per month/; 999 = Don't know | When ballet/dance was not endorsed, values for the follow-up questions are missing. Here, missing values for  the  "... how many...?" follow-up questions have been replaced with "0". 
  #change this so that 0 = nothing, .125 = Less than one day per month, .25 = One day every month, .5 =Once every 2 weeks, 1 = 1 day per week and so on
  mutate_at(.vars = vars(ends_with("_PERWK_P")),
            .funs = funs(case_when(. == 10 ~ 1,
                                   . == 9 ~ 2,
                                   . == 8 ~ 3,
                                   . == 1 ~ 4,
                                   . == 2 ~ 5,
                                   . == 3 ~ 6,
                                   . == 4 ~ 7,
                                   . == 5 ~ 8,
                                   . == 6 ~ 9,
                                   . == 7 ~ 10,
                                   TRUE ~ as.numeric(.)))) %>%
  mutate(phys_ind_daypweek_sum = SAI_SS_SBOARD_PERWK_P + SAI_SS_CLIMB_PERWK_P + SAI_SS_GYM_PERWK_P + SAI_SS_ISKATE_PERWK_P + SAI_SS_M_ARTS_PERWK_P + SAI_SS_SKATE_PERWK_P + SAI_SS_DANCE_PERWK_P + SAI_SS_SURF_PERWK_P + SAI_SS_TENNIS_PERWK_P + SAI_SS_RUN_PERWK_P + SAI_SS_MMA_PERWK_P + SAI_SS_YOGA_PERWK_P) %>%
  mutate(phys_team_daypweek_sum = SAI_SS_BASE_PERWK_P + SAI_SS_BASKET_PERWK_P + SAI_SS_FHOCK_PERWK_P + SAI_SS_FBALL_PERWK_P + SAI_SS_IHOCK_PERWK_P + SAI_SS_POLO_PERWK_P + SAI_SS_LAX_PERWK_P + SAI_SS_RUGBY_PERWK_P + SAI_SS_SOC_PERWK_P + SAI_SS_WPOLO_PERWK_P +SAI_SS_VBALL_PERWK_P) %>%
  mutate(art_daypweek_sum = SAI_SS_COLLECT_PERWK_P + SAI_SS_MUSIC_PERWK_P + SAI_SS_ART_PERWK_P + SAI_SS_DRAMA_PERWK_P + SAI_SS_CRAFTS_PERWK_P + SAI_SS_CHESS_PERWK_P) %>%
  mutate(sport_act_all_daypweek_sum = phys_ind_daypweek_sum + phys_team_daypweek_sum + art_daypweek_sum) %>%
  dplyr::select(    SUBJECTKEY, EVENTNAME, ends_with("_daypweek_sum"))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
sport_act_kerlic_sum %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
     skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
phys_ind_daypweek_sum 93 0.99 4.89 5.64 0 0 4 8 70 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 8.12 7.60 0 0 6 13 54 ▇▃▁▁▁
art_daypweek_sum 118 0.99 4.96 6.25 0 0 4 8 54 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 17.89 13.72 0 8 16 26 170 ▇▁▁▁▁
sport_act_kerlic_sum %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME, ends_with('_daypweek_sum')) %>%
     skimr::skim()
Data summary
Name Piped data
Number of rows 0
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
phys_ind_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
phys_team_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
art_daypweek_sum 0 NaN NaN NA NA NA NA NA NA
sport_act_all_daypweek_sum 0 NaN NaN NA NA NA NA NA NA

2.10.3 physical activity

ABCD Youth Risk Behavior Survey Exercise Physical Activity

phyc_act <-as_tibble(read.csv(paste0(dataFold,"ABCD_YRB01_DATA_TABLE.csv"))) %>%
  rename(physc_act_days = PHYSICAL_ACTIVITY1_Y) %>%
  dplyr::select(SUBJECTKEY, EVENTNAME, physc_act_days)

#During the past 7 days, on how many days were you physically active for a total of at least 60 minutes per day? (Add up all the time you spent in any kind of physical activity that increased your heart rate and made you breathe hard some of the time)

phyc_act %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
physc_act_days 28 1 3.49 2.32 0 2 3 5 7 ▇▅▇▅▇
phyc_act %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
physc_act_days 28 1 3.79 2.15 0 2 4 5 7 ▅▃▇▅▆

2.10.4 BMI and Waist

ABCD Youth Anthropometrics Modified From PhenX values are questionable, even after deleting outliers. We ended up not using them.

anthro <-as_tibble(read.csv(paste0(dataFold,"ABCD_ANT01_DATA_TABLE.csv"))) 

count(anthro,ANTHROWEIGHTCAST)
## # A tibble: 3 × 2
##   ANTHROWEIGHTCAST     n
##              <int> <int>
## 1                0 31903
## 2                1    96
## 3               NA  7767
# remove those (82) with cast as BMI won't be accurate     
bmi_waist <- anthro %>% 
  # filter(ANTHROHEIGHTCALC > 30) %>% # remove those who are unusally short. Potentially error in data entering
  # filter(ANTHROWEIGHTCALC < 500) %>%
  # filter(!rstatix::is_outlier(ANTHROHEIGHTCALC) & !rstatix::is_outlier(ANTHROWEIGHTCALC)) %>%
  mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
                      (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
  rename(waist = ANTHRO_WAIST_CM) %>%
  dplyr::select(SUBJECTKEY, EVENTNAME, bmi, waist, ANTHROWEIGHTCALC, ANTHROHEIGHTCALC)
  
# anthro %>% 
#   mutate(bmi = ifelse(ANTHROWEIGHTCAST == 0 | is.na(ANTHROWEIGHTCAST),
#                       (ANTHROWEIGHTCALC/(ANTHROHEIGHTCALC^2))*703,NA)) %>%
#   rename(waist = ANTHRO_WAIST_CM) %>%
#   arrange(desc(bmi)) %>% glimpse()

# bmi_waist %>% arrange(desc(bmi)) %>% glimpse()
# bmi_waist %>% arrange(bmi) %>% glimpse()
# 
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(ANTHROHEIGHTCALC) %>% View()
# anthro %>% rstatix::identify_outliers(ANTHROHEIGHTCALC) %>%  arrange(desc(ANTHROHEIGHTCALC)) %>% View()

# boxplot(anthro$ANTHROHEIGHTCALC)$out
# boxplot(anthro$ANTHROWEIGHTCALC)$out
# 
# boxplot(bmi_waist$ANTHROWEIGHTCALC)$out

bmi_waist %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11876
Number of columns 4
_______________________
Column type frequency:
logical 1
numeric 3
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
ANTHROWEIGHTCALC 11876 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bmi 11876 0 NaN NA NA NA NA NA NA
waist 17 1 26.48 4.30 0 23.5 25.5 28.7 73 ▁▇▂▁▁
ANTHROHEIGHTCALC 9 1 55.24 3.33 0 53.0 55.1 57.2 82 ▁▁▁▇▁
bmi_waist %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>%
   dplyr::select(-SUBJECTKEY, -EVENTNAME) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 10414
Number of columns 4
_______________________
Column type frequency:
logical 1
numeric 3
________________________
Group variables None

Variable type: logical

skim_variable n_missing complete_rate mean count
ANTHROWEIGHTCALC 10414 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bmi 10414 0.00 NaN NA NA NA NA NA NA
waist 2892 0.72 28.77 4.97 0.00 25.1 27.75 31.0 75.0 ▁▇▃▁▁
ANTHROHEIGHTCALC 1560 0.85 60.19 3.60 5.75 58.0 60.00 62.5 98.3 ▁▁▇▅▁

2.11 Join data

Set the feature names

Child_Sleep <- c("sleep_hours","sleep_disturb","sleep_initiate_maintain","sleep_breath","sleep_arousal","sleep_transition","sleep_somnolence","sleep_hyperhydrosis")

Physical_Activity <- c("phys_ind_daypweek_sum","phys_team_daypweek_sum","art_daypweek_sum","physc_act_days")

Child_Screen_Use <- c("matureGames_Screen","matureMovies_Screen","wkdySum_Screen","wkndSum_Screen")


Parent_Drug_Use <-c("tobacco_before_preg","tobacco_after_preg","alcohol_before_preg","alcohol_after_preg","marijuana_before_preg","marijuana_after_preg")


Child_Developmental_Adversity <- c("deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications")

Child_Socio_Demographics <- c("bilingual_use","marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports","neighbo_safety_parent_sum","neighbo_safety_child_sum","sumSchool_environment","sumSchool_involvement","sumSchool_disengagement")

Social_Interaction<- c("parent_monitor_mean","fam_conflict_parent","fam_conflict_children","prosocial_parent_mean","prosocial_youth_mean")



features <- c(Child_Sleep,Physical_Activity,Child_Screen_Use,Parent_Drug_Use,Child_Developmental_Adversity,Child_Socio_Demographics,Social_Interaction)
all_sum_vars <- 
  plyr::join_all(list(Siteinfo, ACSselected,sleepSum,youthScreenSum,
                      momSubstanceUse,adversitySum,bilingualSum,
                      demograpSum,ResidHistDer,NeighboSafety,
                      school_risk_sum,ParentMonitoring,FamilyConflict_sum,prosocial_sum,
                      sport_act_kerlic_sum,phyc_act,vision_idx), 
                 by=c('SUBJECTKEY','EVENTNAME'), type='full') %>%
  filter(visionProb != 1|is.na(visionProb)) %>% #remove subjects with eyesight problems 
  dplyr::select(-visionProb)


all_sum_vars %>%  filter(EVENTNAME =="baseline_year_1_arm_1") %>% dplyr::select(-1:-2) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 11845
Number of columns 66
_______________________
Column type frequency:
character 6
factor 11
numeric 49
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 11845 0
SRC_SUBJECT_ID 0 1 16 16 0 11845 0
INTERVIEW_DATE 0 1 9 9 0 756 0
SEX 0 1 1 1 0 2 0
EVENTNAME 0 1 21 21 0 1 0
SITE_ID_L 0 1 6 6 0 22 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
REL_FAMILY_ID 0 1.00 FALSE 9832 373: 5, 749: 4, 11: 3, 400: 3
tobacco_before_preg 277 0.98 FALSE 2 0: 9960, 1: 1608
tobacco_after_preg 263 0.98 FALSE 2 0: 10962, 1: 620
alcohol_before_preg 681 0.94 FALSE 2 0: 8289, 1: 2875
alcohol_after_preg 291 0.98 FALSE 2 0: 11239, 1: 315
marijuana_before_preg 337 0.97 FALSE 2 0: 10822, 1: 686
marijuana_after_preg 275 0.98 FALSE 2 0: 11326, 1: 244
deveplopment_prematurity 145 0.99 FALSE 2 0: 9497, 1: 2203
bilingual_status 81 0.99 FALSE 2 0: 7343, 1: 4421
bilingual_degree 81 0.99 FALSE 3 0: 7343, 2: 2743, 1: 1678
marital 96 0.99 FALSE 6 mar: 7973, nev: 1455, div: 1077, liv: 684

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
INTERVIEW_AGE 0 1.00 118.98 7.49 107.00 112.00 119.00 126.00 133.00 ▇▆▆▆▆
SCHED_DELAY 13 1.00 7.00 0.10 1.00 7.00 7.00 7.00 7.00 ▁▁▁▁▇
SCHED_HYBRID 11845 0.00 NaN NA NA NA NA NA NA
ACS_RAKED_PROPENSITY_SCORE 0 1.00 691.25 350.96 161.36 448.94 619.31 821.72 1778.92 ▅▇▂▂▁
sleep_hours 5 1.00 1.72 0.81 1.00 1.00 2.00 2.00 5.00 ▇▆▂▁▁
sleep_disturb 5 1.00 1.93 0.98 1.00 1.00 2.00 2.00 5.00 ▇▇▂▁▁
sleep_initiate_maintain 5 1.00 11.76 3.75 7.00 9.00 11.00 13.00 35.00 ▇▃▁▁▁
sleep_breath 5 1.00 3.77 1.25 3.00 3.00 3.00 4.00 15.00 ▇▁▁▁▁
sleep_arousal 5 1.00 3.44 0.92 3.00 3.00 3.00 4.00 15.00 ▇▁▁▁▁
sleep_transition 32 1.00 8.18 2.63 6.00 6.00 7.00 9.00 30.00 ▇▁▁▁▁
sleep_somnolence 6 1.00 6.95 2.44 5.00 5.00 6.00 8.00 25.00 ▇▁▁▁▁
sleep_hyperhydrosis 5 1.00 2.44 1.18 2.00 2.00 2.00 2.00 10.00 ▇▁▁▁▁
sleep_total 33 1.00 36.53 8.24 26.00 31.00 35.00 40.00 126.00 ▇▁▁▁▁
matureGames_Screen 20 1.00 0.57 0.87 0.00 0.00 0.00 1.00 3.00 ▇▃▁▁▁
matureMovies_Screen 21 1.00 0.38 0.64 0.00 0.00 0.00 1.00 3.00 ▇▃▁▁▁
wkdySum_Screen 37 1.00 3.46 3.10 0.00 1.25 2.50 4.75 24.00 ▇▂▁▁▁
wkndSum_Screen 42 1.00 4.62 3.63 0.00 2.00 3.50 6.25 24.00 ▇▃▁▁▁
deveplopment_birth_complications 759 0.94 0.37 0.74 0.00 0.00 0.00 1.00 8.00 ▇▁▁▁▁
deveplopment_pregnancy_complications 741 0.94 0.61 1.02 0.00 0.00 0.00 1.00 12.00 ▇▁▁▁▁
bilingual_use 81 0.99 1.02 1.70 0.00 0.00 0.00 1.00 9.00 ▇▂▁▁▁
education1stPar 17 1.00 16.60 2.77 1.00 15.00 18.00 19.00 21.00 ▁▁▂▅▇
education2ndPar 2458 0.79 16.38 3.06 0.00 15.00 18.00 18.00 21.00 ▁▁▁▅▇
educationAvg 14 1.00 16.38 2.70 3.00 15.00 17.00 18.50 21.00 ▁▁▂▇▇
combinedIncome 1015 0.91 7.23 2.42 1.00 6.00 8.00 9.00 10.00 ▂▂▃▆▇
householdSize 279 0.98 4.70 1.55 0.00 4.00 4.00 5.00 19.00 ▂▇▁▁▁
econ_insecurities_sum 134 0.99 0.47 1.10 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
area_deprivation_index 876 0.93 94.63 21.18 1.07 87.85 98.78 108.39 125.75 ▁▁▂▇▇
lead_risk 651 0.95 5.10 3.11 1.00 2.00 5.00 8.00 10.00 ▇▆▅▅▆
quartic_uniform_crime_reports 649 0.95 12.09 5.78 0.00 9.41 12.28 15.20 24.29 ▂▃▇▅▁
neighbo_safety_parent_sum 47 1.00 11.67 2.93 3.00 10.00 12.00 14.00 15.00 ▁▁▃▆▇
neighbo_safety_child_sum 24 1.00 4.03 1.10 1.00 3.00 4.00 5.00 5.00 ▁▁▃▆▇
sumSchool_environment 27 1.00 19.93 2.83 6.00 18.00 20.00 22.00 24.00 ▁▁▂▇▇
sumSchool_involvement 26 1.00 13.06 2.37 4.00 12.00 13.00 15.00 16.00 ▁▁▃▅▇
sumSchool_disengagement 25 1.00 3.74 1.46 2.00 3.00 4.00 5.00 8.00 ▇▃▃▁▁
parent_monitor_mean 23 1.00 4.38 0.52 1.00 4.20 4.40 4.80 5.00 ▁▁▁▃▇
fam_conflict_parent 12 1.00 2.54 1.96 0.00 1.00 2.00 4.00 9.00 ▇▇▅▂▁
fam_conflict_children 27 1.00 2.04 1.95 0.00 0.00 2.00 3.00 9.00 ▇▅▂▁▁
prosocial_parent_mean 62 0.99 1.75 0.40 0.00 1.67 2.00 2.00 2.00 ▁▁▁▁▇
prosocial_youth_mean 33 1.00 1.68 0.37 0.00 1.33 1.67 2.00 2.00 ▁▁▁▂▇
phys_ind_daypweek_sum 93 0.99 4.89 5.64 0.00 0.00 4.00 8.00 70.00 ▇▁▁▁▁
phys_team_daypweek_sum 112 0.99 8.13 7.61 0.00 0.00 6.00 13.00 54.00 ▇▃▁▁▁
art_daypweek_sum 118 0.99 4.96 6.24 0.00 0.00 4.00 8.00 54.00 ▇▁▁▁▁
sport_act_all_daypweek_sum 273 0.98 17.90 13.72 0.00 8.00 16.00 26.00 170.00 ▇▁▁▁▁
physc_act_days 28 1.00 3.50 2.32 0.00 2.00 3.00 5.00 7.00 ▇▅▇▅▇
ABCD_SVS01_ID 0 1.00 50523.16 6430.86 39373.00 44951.00 50525.00 56092.00 61660.00 ▇▇▇▇▇
SNELLEN_AID_Y 21 1.00 0.24 0.43 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
SNELLEN_AIDPRES_Y 8987 0.24 0.62 0.49 0.00 0.00 1.00 1.00 1.00 ▅▁▁▁▇
SNELLEN_VA_Y 28 1.00 6.84 1.44 2.00 6.00 7.00 8.00 11.00 ▁▃▇▆▁
VIS_FLG 11083 0.06 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
all_sum_vars %>%  filter(EVENTNAME =="2_year_follow_up_y_arm_1") %>% dplyr::select(-1:-2) %>%
   skimr::skim()
Data summary
Name Piped data
Number of rows 10387
Number of columns 66
_______________________
Column type frequency:
character 6
factor 11
numeric 49
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECTKEY 0 1 12 16 0 10387 0
SRC_SUBJECT_ID 0 1 16 16 0 10387 0
INTERVIEW_DATE 0 1 9 9 0 788 0
SEX 0 1 1 1 0 2 0
EVENTNAME 0 1 24 24 0 1 0
SITE_ID_L 0 1 6 6 0 21 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
REL_FAMILY_ID 10387 0.00 FALSE 0 0: 0, 1: 0, 3: 0, 4: 0
tobacco_before_preg 10387 0.00 FALSE 0 0: 0, 1: 0
tobacco_after_preg 10387 0.00 FALSE 0 0: 0, 1: 0
alcohol_before_preg 10387 0.00 FALSE 0 0: 0, 1: 0
alcohol_after_preg 10387 0.00 FALSE 0 0: 0, 1: 0
marijuana_before_preg 10387 0.00 FALSE 0 0: 0, 1: 0
marijuana_after_preg 10387 0.00 FALSE 0 0: 0, 1: 0
deveplopment_prematurity 10387 0.00 FALSE 0 0: 0, 1: 0
bilingual_status 71 0.99 FALSE 2 0: 6232, 1: 4084
bilingual_degree 71 0.99 FALSE 3 0: 6232, 2: 2721, 1: 1363
marital 10387 0.00 FALSE 0 mar: 0, wid: 0, div: 0, sep: 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
INTERVIEW_AGE 0 1.00 144.04 7.95 127.00 137.00 144.00 151.00 168.00 ▅▇▇▅▁
SCHED_DELAY 0 1.00 7.55 0.89 7.00 7.00 7.00 9.00 9.00 ▇▁▁▁▃
SCHED_HYBRID 7674 0.26 0.49 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▇
ACS_RAKED_PROPENSITY_SCORE 10387 0.00 NaN NA NA NA NA NA NA
sleep_hours 74 0.99 1.99 0.87 1.00 1.00 2.00 2.00 5.00 ▆▇▃▁▁
sleep_disturb 74 0.99 2.05 1.05 1.00 1.00 2.00 3.00 5.00 ▇▇▃▁▁
sleep_initiate_maintain 74 0.99 12.04 3.77 7.00 9.00 11.00 14.00 34.00 ▇▃▁▁▁
sleep_breath 74 0.99 3.69 1.15 3.00 3.00 3.00 4.00 15.00 ▇▁▁▁▁
sleep_arousal 74 0.99 3.31 0.72 3.00 3.00 3.00 3.00 11.00 ▇▁▁▁▁
sleep_transition 75 0.99 7.80 2.43 6.00 6.00 7.00 9.00 28.00 ▇▁▁▁▁
sleep_somnolence 74 0.99 7.14 2.59 5.00 5.00 6.00 8.00 25.00 ▇▁▁▁▁
sleep_hyperhydrosis 74 0.99 2.34 0.99 2.00 2.00 2.00 2.00 10.00 ▇▁▁▁▁
sleep_total 75 0.99 36.32 8.04 26.00 31.00 34.00 40.00 105.00 ▇▂▁▁▁
matureGames_Screen 29 1.00 0.62 0.88 0.00 0.00 0.00 1.00 3.00 ▇▃▁▂▁
matureMovies_Screen 37 1.00 0.49 0.65 0.00 0.00 0.00 1.00 3.00 ▇▅▁▁▁
wkdySum_Screen 10387 0.00 NaN NA NA NA NA NA NA
wkndSum_Screen 10387 0.00 NaN NA NA NA NA NA NA
deveplopment_birth_complications 10387 0.00 NaN NA NA NA NA NA NA
deveplopment_pregnancy_complications 10387 0.00 NaN NA NA NA NA NA NA
bilingual_use 71 0.99 1.01 1.64 0.00 0.00 0.00 1.00 9.00 ▇▂▁▁▁
education1stPar 10387 0.00 NaN NA NA NA NA NA NA
education2ndPar 10387 0.00 NaN NA NA NA NA NA NA
educationAvg 10387 0.00 NaN NA NA NA NA NA NA
combinedIncome 10387 0.00 NaN NA NA NA NA NA NA
householdSize 10387 0.00 NaN NA NA NA NA NA NA
econ_insecurities_sum 10387 0.00 NaN NA NA NA NA NA NA
area_deprivation_index 6957 0.33 95.81 17.86 3.39 88.32 98.72 107.64 125.75 ▁▁▁▇▇
lead_risk 6878 0.34 4.98 3.12 1.00 2.00 5.00 8.00 10.00 ▇▅▅▃▆
quartic_uniform_crime_reports 6877 0.34 12.03 5.65 0.00 9.41 12.28 15.20 24.29 ▂▃▇▅▁
neighbo_safety_parent_sum 119 0.99 11.62 2.80 3.00 10.00 12.00 14.00 15.00 ▁▁▅▆▇
neighbo_safety_child_sum 32 1.00 4.09 1.00 1.00 4.00 4.00 5.00 5.00 ▁▁▃▆▇
sumSchool_environment 35 1.00 19.64 2.78 6.00 18.00 20.00 22.00 24.00 ▁▁▂▇▇
sumSchool_involvement 35 1.00 12.66 2.33 4.00 11.00 13.00 14.00 16.00 ▁▁▅▇▇
sumSchool_disengagement 35 1.00 3.99 1.35 2.00 3.00 4.00 5.00 8.00 ▇▆▅▂▁
parent_monitor_mean 32 1.00 4.49 0.47 1.00 4.20 4.60 4.80 5.00 ▁▁▁▂▇
fam_conflict_parent 67 0.99 2.43 1.97 0.00 1.00 2.00 4.00 9.00 ▇▇▃▂▁
fam_conflict_children 36 1.00 1.91 1.82 0.00 0.00 1.00 3.00 9.00 ▇▅▂▁▁
prosocial_parent_mean 86 0.99 1.72 0.42 0.00 1.67 2.00 2.00 2.00 ▁▁▁▁▇
prosocial_youth_mean 32 1.00 1.71 0.37 0.00 1.33 2.00 2.00 2.00 ▁▁▁▂▇
phys_ind_daypweek_sum 10387 0.00 NaN NA NA NA NA NA NA
phys_team_daypweek_sum 10387 0.00 NaN NA NA NA NA NA NA
art_daypweek_sum 10387 0.00 NaN NA NA NA NA NA NA
sport_act_all_daypweek_sum 10387 0.00 NaN NA NA NA NA NA NA
physc_act_days 28 1.00 3.79 2.15 0.00 2.00 4.00 5.00 7.00 ▅▃▇▅▆
ABCD_SVS01_ID 0 1.00 50511.20 6441.84 39372.00 44933.00 50506.00 56094.00 61661.00 ▇▇▇▇▇
SNELLEN_AID_Y 1626 0.84 0.28 0.45 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▃
SNELLEN_AIDPRES_Y 7945 0.24 0.67 0.47 0.00 0.00 1.00 1.00 1.00 ▃▁▁▁▇
SNELLEN_VA_Y 1625 0.84 7.11 1.53 2.00 6.00 7.00 8.00 11.00 ▁▃▇▇▁
VIS_FLG 9840 0.05 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁

2.12 preprocess site

make sure that there are no members from the same family at different sites

all_sum_vars_baseline <- all_sum_vars %>% filter(EVENTNAME =="baseline_year_1_arm_1") 

all_sum_vars_baseline %>% count(SITE_ID_L)
##    SITE_ID_L    n
## 1     site01  405
## 2     site02  558
## 3     site03  629
## 4     site04  745
## 5     site05  377
## 6     site06  580
## 7     site07  339
## 8     site08  350
## 9     site09  433
## 10    site10  736
## 11    site11  448
## 12    site12  600
## 13    site13  726
## 14    site14  606
## 15    site15  457
## 16    site16 1010
## 17    site17  577
## 18    site18  384
## 19    site19  549
## 20    site20  701
## 21    site21  599
## 22    site22   36
# check if there are members from the same family at different sites. There are 6 of them.
all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  count(REL_FAMILY_ID, SITE_ID_L) %>%
  spread(SITE_ID_L, n, fill = 0) %>%
  dplyr::select(-REL_FAMILY_ID) %>% 
       as.matrix %>% 
       crossprod
##        site01 site02 site03 site04 site05 site06 site07 site08 site09 site10
## site01    495      0      0      0      0      0      0      0      0      0
## site02      0   1048      0      0      0      0      0      0      0      0
## site03      0      0    751      0      0      0      0      0      0      0
## site04      0      0      0    955      0      0      0      0      0      0
## site05      0      0      0      0    473      0      0      0      0      0
## site06      0      0      0      0      0    688      0      0      0      0
## site07      0      0      0      0      0      0    425      0      0      0
## site08      0      0      0      0      0      0      0    434      0      0
## site09      0      0      0      0      0      0      0      0    479      0
## site10      0      0      0      0      0      0      0      0      0    910
## site11      0      0      0      0      0      0      0      0      0      0
## site12      0      0      0      0      0      0      0      0      0      0
## site13      0      0      0      0      0      0      0      0      0      0
## site14      0      0      0      0      0      0      0      0      0      0
## site15      0      0      0      0      0      0      0      0      0      0
## site16      0      0      0      0      0      0      0      0      0      0
## site17      0      0      0      0      0      0      0      0      0      0
## site18      0      0      0      0      0      0      0      0      0      0
## site19      0      0      0      0      0      0      0      0      0      0
## site20      0      0      0      0      0      0      0      0      0      0
## site21      0      0      0      0      0      0      0      0      0      0
##        site11 site12 site13 site14 site15 site16 site17 site18 site19 site20
## site01      0      0      0      0      0      0      0      0      0      0
## site02      0      0      0      0      0      0      0      0      0      0
## site03      0      0      0      0      0      0      0      0      0      0
## site04      0      0      0      0      0      0      0      0      0      0
## site05      0      0      0      0      0      0      0      0      0      0
## site06      0      0      0      0      0      0      0      0      0      0
## site07      0      0      0      0      0      0      0      0      0      0
## site08      0      0      0      0      0      0      0      0      0      0
## site09      0      0      0      0      0      0      0      0      0      0
## site10      0      0      0      0      0      0      0      0      0      0
## site11    562      0      0      0      0      0      0      0      0      0
## site12      0    746      0      0      0      0      0      0      0      0
## site13      0      0    888      0      0      0      0      0      0      0
## site14      0      0      0   1106      0      0      0      0      0      0
## site15      0      0      0      0    549      0      0      0      0      0
## site16      0      0      0      0      0   1394      0      0      0      0
## site17      0      0      0      0      0      0    697      0      0      0
## site18      0      0      0      0      0      0      0    448      0      0
## site19      0      0      0      0      0      0      0      0   1015      0
## site20      0      0      0      0      0      0      0      0      0   1187
## site21      0      0      0      0      0      0      0      0      0      0
##        site21
## site01      0
## site02      0
## site03      0
## site04      0
## site05      0
## site06      0
## site07      0
## site08      0
## site09      0
## site10      0
## site11      0
## site12      0
## site13      0
## site14      0
## site15      0
## site16      0
## site17      0
## site18      0
## site19      0
## site20      0
## site21    723
#below will remove those
all_sum_vars_baseline_no_dup <- all_sum_vars_baseline %>%
  drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22") %>%
  group_by(REL_FAMILY_ID) %>% 
  nest(SITE_ID_L, .key="SITE_ID_L") %>%
  mutate(dup = ifelse(length(c(unlist(SITE_ID_L)))==1,0,
                      ifelse(length(unique(c(unlist(SITE_ID_L)))) > 1,1,0))) %>%
  unnest(SITE_ID_L) %>%
  ungroup()
## Warning: Supplying `...` without names was deprecated in tidyr 1.0.0.
## ℹ Please specify a name for each selection.
## ℹ Did you want `SITE_ID_L = SITE_ID_L`?
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
family_exclude <- unique(all_sum_vars_baseline_no_dup$REL_FAMILY_ID[which(all_sum_vars_baseline_no_dup$dup==1)])

all_sum_vars_no_dup <- all_sum_vars%>%
                 filter(!REL_FAMILY_ID %in% family_exclude)%>%
    drop_na(SITE_ID_L) %>%
  filter(SITE_ID_L != "site22")


### test whether the data set has all the features
setdiff(features,all_sum_vars_no_dup%>% colnames())
## character(0)

3 Modeling for Socio-Demographic and Psychological Factors

Samples: REL_FAMILY_ID (9856 Levels) SITE_ID_L (need to remove 22nd site. having too few subjects) ALSO make sure about EVENTNAME

Target: Factor analysis of psychopathology: pfactor

46 Features: soc-demo-lifestyle-dev

Features by catergories:

Child Sleep (8): sleep_hours sleep_disturb sleep_initiate_maintain sleep_breath sleep_arousal sleep_transition sleep_somnolence sleep_hyperhydrosis

Physical Activity (4): phys_ind_daypweek_sum phys_team_daypweek_sum art_daypweek_sum physc_act_days

Child Screen Use (4): matureGames_Screen matureMovies_Screen wkdySum_Screen wkndSum_Screen

Parent Drug Use (6): tobacco_before_preg tobacco_after_preg alcohol_before_preg alcohol_after_preg marijuana_before_preg marijuana_after_preg

Child Developmental Adversity (3): deveplopment_prematurity deveplopment_birth_complications deveplopment_pregnancy_complications

Child Socio-Demographics (14): bilingual_use marital educationAvg combinedIncome householdSize econ_insecurities_sum area_deprivation_index lead_risk quartic_uniform_crime_reports neighbo_safety_parent_sum neighbo_safety_child_sum sumSchool_environment sumSchool_involvement sumSchool_disengagement

Social Interaction (5): parent_monitor_mean fam_conflict_parent fam_conflict_children prosocial_parent_mean prosocial_youth_mean

set up vector of names based on different catorgies of features

3.0.1 process data for modelling

all_features_no_dup <- all_sum_vars_no_dup %>% dplyr::select(all_of(subj_info),all_of(features))
## change the character variables and factors into numeric values

factor_features <- c("tobacco_before_preg","tobacco_after_preg","alcohol_before_preg"    ,"alcohol_after_preg","marijuana_before_preg","marijuana_after_preg","deveplopment_prematurity", "marital"  )

all_features_no_dup_num <- all_features_no_dup %>%
                mutate(marital = recode_factor(as.factor(marital),married=1,  widowed=2,divorced=3,separated=4,neverMarried=5,livingWithPartner=6,.default = 1))
#%>%
#  mutate_if(is.factor,as.numeric)
   

###check the NAs in both baseline and followup data sets
  all_features_no_dup_num_baseline <- all_features_no_dup_num%>%
    filter(EVENTNAME=="baseline_year_1_arm_1") 
  
naniar::vis_miss(all_features_no_dup_num_baseline)

 all_features_no_dup_num_followup <- all_features_no_dup_num%>%
    filter(EVENTNAME=="2_year_follow_up_y_arm_1") 
  
naniar::vis_miss(all_features_no_dup_num_followup)

3.1 Replace the variables with baseline data in the followup

The following variables are only found in the baseline. Those variables are:

“phys_ind_daypweek_sum”

“phys_team_daypweek_sum”

“art_daypweek_sum”

“wkdySum_Screen”

“wkndSum_Screen”

“tobacco_before_preg”

“tobacco_after_preg”

“alcohol_before_preg”

“alcohol_after_preg”

“marijuana_before_preg”

“marijuana_after_preg”

“deveplopment_prematurity”

“deveplopment_birth_complications”

“deveplopment_pregnancy_complications”

“marital”

“educationAvg”

“combinedIncome”

“householdSize”

“econ_insecurities_sum”

“area_deprivation_index”

“lead_risk”

“quartic_uniform_crime_reports”

We use the exact replications in the followup analysis.

The following variables appear in both baseline and followup data:

“sleep_hours”

“sleep_disturb”

“sleep_initiate_maintain”

“sleep_breath”

“sleep_arousal”

“sleep_transition”

“sleep_somnolence”

“sleep_hyperhydrosis”

“physc_act_days”

“matureGames_Screen”

“matureMovies_Screen”

“bilingual_use”

“neighbo_safety_parent_sum”

“neighbo_safety_child_sum”

“sumSchool_environment”

“sumSchool_involvement”

“sumSchool_disengagement”

“parent_monitor_mean”

“fam_conflict_parent”

“fam_conflict_children”

“prosocial_parent_mean”

“prosocial_youth_mean”

features_fix_na <- c("phys_ind_daypweek_sum","phys_team_daypweek_sum","art_daypweek_sum" ,"wkdySum_Screen"  ,"wkndSum_Screen","tobacco_before_preg","tobacco_after_preg","alcohol_before_preg","alcohol_after_preg","marijuana_before_preg","marijuana_after_preg","deveplopment_prematurity","deveplopment_birth_complications","deveplopment_pregnancy_complications","marital","educationAvg","combinedIncome","householdSize","econ_insecurities_sum","area_deprivation_index","lead_risk","quartic_uniform_crime_reports"  )

all_features_no_dup_na_fix_baseline <- all_features_no_dup_num_baseline%>%
                                       dplyr::select(all_of(c("SUBJECTKEY","SITE_ID_L")),all_of(features_fix_na))

all_features_no_dup_na_fix_followup <- all_features_no_dup_num_followup %>% 
                                       dplyr::select(-all_of(features_fix_na))
all_features_no_dup_na_fixed_followup <- left_join(all_features_no_dup_na_fix_followup,
                                                   all_features_no_dup_na_fix_baseline,
                                                   by =c("SUBJECTKEY","SITE_ID_L"))

## plot the information of missingness after all the NAs are fixed
naniar::vis_miss(all_features_no_dup_na_fixed_followup)

all_features_no_dup_na_fixed <- bind_rows(all_features_no_dup_num_baseline,all_features_no_dup_na_fixed_followup)

Making data splits by site.

site_col <- all_features_no_dup_na_fixed  %>%
  distinct(SITE_ID_L) %>% 
  arrange(SITE_ID_L) 

site_list <- as.list(site_col$SITE_ID_L)

site_char <- as.character(unlist(site_col$SITE_ID_L))

split_list <- purrr::map(site_list, ~split_func(.x,data_input =all_features_no_dup_na_fixed ))


names(split_list) <- site_char

Join features and response across sites

feature_resp_join <- function(site_input){
  features_list <- split_list[[site_input]]
  baseline_train_features <- training(features_list)%>%
                             filter(EVENTNAME == "baseline_year_1_arm_1")
  baseline_test_features <- testing(features_list)%>%
                             filter(EVENTNAME == "baseline_year_1_arm_1")
  followup_train_features <- training(features_list)%>%
                             filter(EVENTNAME == "2_year_follow_up_y_arm_1")
  followup_test_features <- testing(features_list)%>%
                             filter(EVENTNAME == "2_year_follow_up_y_arm_1")
  
  baseline_train <- baseline_train_gfactor[[site_input]]
  baseline_test <- baseline_test_gfactor[[site_input]]
  followup_train <- followup_train_gfactor[[site_input]]
  followup_test <- followup_test_gfactor[[site_input]]
  ## NAs are removed here
  baseline_train_all <- full_join(baseline_train_features,baseline_train,by = "SUBJECTKEY")%>%
                        drop_na("gfactor")
  baseline_test_all <- full_join(baseline_test_features,baseline_test,by = "SUBJECTKEY")%>%
                        drop_na("gfactor")
  followup_train_all <- full_join(followup_train_features,followup_train,by = "SUBJECTKEY")%>%
                        drop_na("gfactor")
  followup_test_all <- full_join(followup_test_features,followup_test,by = "SUBJECTKEY")%>%
                        drop_na("gfactor")
  
  baseline_train_select <- baseline_train_all%>%
                           dplyr::select(-all_of(subj_info))
  baseline_test_select <- baseline_test_all%>%
                           dplyr::select(-all_of(subj_info))
  followup_train_select <- followup_train_all%>%
                           dplyr::select(-all_of(subj_info))
  followup_test_select <- followup_test_all%>%
                           dplyr::select(-all_of(subj_info))
  
  
return(list(baseline_train = baseline_train_all,
            baseline_test=baseline_test_all,
            followup_train=followup_train_all,
            followup_test=followup_test_all,
            baseline_train_select = baseline_train_select,
            baseline_test_select=baseline_test_select,
            followup_train_select=followup_train_select,
            followup_test_select=followup_test_select))
}


gfactor_ses_split_list <- purrr::map(site_char,~feature_resp_join(site_input = .))

names(gfactor_ses_split_list) <- site_char


gfactor_ses_baseline_train <- purrr::map(gfactor_ses_split_list,"baseline_train")
gfactor_ses_baseline_test <- purrr::map(gfactor_ses_split_list,"baseline_test")
gfactor_ses_followup_train <- purrr::map(gfactor_ses_split_list,"followup_train")
gfactor_ses_followup_test <- purrr::map(gfactor_ses_split_list,"followup_test")


gfactor_ses_baseline_train_select <- purrr::map(gfactor_ses_split_list,"baseline_train_select")
gfactor_ses_baseline_test_select <- purrr::map(gfactor_ses_split_list,"baseline_test_select")
gfactor_ses_followup_train_select <- purrr::map(gfactor_ses_split_list,"followup_train_select")
gfactor_ses_followup_test_select <- purrr::map(gfactor_ses_split_list,"followup_test_select")

3.1.1 scale the data set with dummy variables

This function can be run by replacing select and map function in the r functions file. Or, run it without loading the PLS packages.

The process of data preparation is as follows

  1. Use a recipe to change the dummy variables into the continuous variables. (In the mean time impute the NA with mode and then K-nearest-neightbours)
  2. Scale the baseline train and test data separately. Then scale the followup train and test with the mean and standard deviation from baseline train and test respectively.
processed_features_gfactor_ses_list <- purrr::pmap(list(gfactor_ses_baseline_train,
                                                        gfactor_ses_baseline_test,
                                                        gfactor_ses_followup_train,
                                                        gfactor_ses_followup_test),
                                                   ~data_processing_cross_sites_seperate_dummy(baseline_train=..1,
                                                 baseline_test=..2,
                                                 followup_train=..3,
                                                 followup_test=..4))

save and load the processed datasets

3.2 loading the processed data output

3.2.1 manually fixing variavbles with all zeros in the test datasets

Some of the factor features are all zero in the test sets. They are removed in the recipe because they have a standard deviation of 0. Then those variables are deleted. When fit the model in the test sets, this problem leads to the model fitting function failing to run.

As a part of the imputation that is never done by the recipe function. Those NAs in the data set are going to be replaced with 0.

We find out that 1. observations in site 3 do not have crime report. 2. observations in site 7 do not have widowers.

### find the features
train_colnames_site03 <- processed_features_gfactor_ses_list[["site03"]][["output_train_baseline"]]%>% colnames()
test_colnames_site03 <- processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]]%>% colnames()
site03_add <- setdiff(train_colnames_site03,test_colnames_site03)
site03_add
## [1] "quartic_uniform_crime_reports"
### check the original data sets

gfactor_ses_baseline_test$site03[[site03_add]]
##   [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [26]  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [51]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [76]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [101]  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0
## [126]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [151]  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0
## [176]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [201]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [226]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [251]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [276]  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [301]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [326]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0
## [351]  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [376]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [401]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [426]  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0 NA
## [451]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [476]  0  0  0  0  0  0  0  0  0  0 NA  0 NA  0  0  0  0  0  0  0  0  0  0 NA  0
## [501]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [526]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [551]  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [576]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
### manually fix baseline and followup data

processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]] <- processed_features_gfactor_ses_list[["site03"]][["output_test_baseline"]]%>% mutate(quartic_uniform_crime_reports=0)


processed_features_gfactor_ses_list[["site03"]][["output_test_followup"]] <- processed_features_gfactor_ses_list[["site03"]][["output_test_followup"]]%>% mutate(quartic_uniform_crime_reports=0)

### do the same thing for site07, site20

### find the features
train_colnames_site07 <- processed_features_gfactor_ses_list[["site07"]][["output_train_baseline"]]%>% colnames()
test_colnames_site07 <- processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]]%>% colnames()
site07_add <- setdiff(train_colnames_site07,test_colnames_site07)
site07_add
## [1] "marital_X2"
### check the original data sets

which(gfactor_ses_baseline_test$site07[["marital"]]==2)
## integer(0)
### manually fix baseline and followup data

processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]] <- processed_features_gfactor_ses_list[["site07"]][["output_test_baseline"]]%>% mutate(marital_X2=0)


processed_features_gfactor_ses_list[["site07"]][["output_test_followup"]] <- processed_features_gfactor_ses_list[["site07"]][["output_test_followup"]]%>% mutate(marital_X2=0)

extract the processed datasets

processed_ses_baseline_train <- purrr::map(processed_features_gfactor_ses_list,"output_train_baseline")
processed_ses_baseline_test <- purrr::map(processed_features_gfactor_ses_list,"output_test_baseline")
processed_ses_followup_train <- purrr::map(processed_features_gfactor_ses_list,"output_train_followup")
processed_ses_followup_test <- purrr::map(processed_features_gfactor_ses_list,"output_test_followup")

processed_ses_baseline_train_select <- purrr::map(processed_ses_baseline_train,~dplyr::select(.,-all_of(subj_info)))
processed_ses_baseline_test_select <- purrr::map(processed_ses_baseline_test,~dplyr::select(.,-all_of(subj_info)))
processed_ses_followup_train_select <- purrr::map(processed_ses_followup_train,~dplyr::select(.,-all_of(subj_info)))
processed_ses_followup_test_select <- purrr::map(processed_ses_followup_test,~dplyr::select(.,-all_of(subj_info)))

4 model fitting

4.1 Baseline model fitting

### fit the enet model
### baseline
dummy_features <- processed_ses_baseline_train_select[[1]] %>% dplyr::select(-"gfactor") %>% colnames()


ses_baseline_recipe_list <- purrr::map(.x = processed_ses_baseline_train_select,
                             ~recipe_prep(train_input=.x, features_input = dummy_features)) 

ses_pls_fit_baseline <-purrr::map(.x=ses_baseline_recipe_list,
                              ~pls_tune(recipe_input = .x,feature_input =dummy_features )) 

ses_pls_fit_baseline_wf <- purrr::map(ses_pls_fit_baseline,"pls_final_wf")


ses_pls_model_fit_baseline <- purrr::pmap(list(ses_baseline_recipe_list,
                                        ses_pls_fit_baseline_wf,
                                        processed_ses_baseline_test_select),~
                                                 model_final_fit(recipe_input = ..1, 
                                    wf_input = ..2,
                                    test_data = ..3)) 


ses_pls_pred_baseline <- purrr::map(ses_pls_model_fit_baseline,"model_predict")

ses_pls_model_fit_baseline_train <- purrr::pmap(list(ses_baseline_recipe_list,
                                        ses_pls_fit_baseline_wf,
                                        processed_ses_baseline_train_select),~
                                                 model_final_fit(recipe_input = ..1, 
                                    wf_input = ..2,
                                    test_data = ..3)) 

ses_pls_pred_baseline_train <- purrr::map(ses_pls_model_fit_baseline_train,"model_predict")


ses_baseline_metric <- purrr::map2(.x=ses_pls_pred_baseline,
     .y=processed_ses_baseline_test,~metric_compute_site(data_input =.x ,
                                           site_input = .y)) %>%
                      do.call(rbind,.)
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `"gfactor"` instead of `.data$gfactor`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `"model_predict"` instead of `.data$model_predict`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
      ses_baseline_metric%>% 
    kableExtra::kbl(caption = "metrics for all sites in baseline") %>%
    kableExtra::kable_classic(full_width = F, 
                             html_font = "Cambria")
metrics for all sites in baseline
correlation tradrsq MAE RMSE site
0.5296415 0.2799987 0.6674212 0.8473496 site01
0.4190442 0.1738486 0.7130802 0.9080960 site02
0.4256129 0.1798805 0.7140928 0.9048470 site03
0.4605481 0.2073923 0.7073440 0.8896793 site04
0.5315242 0.2817477 0.6726602 0.8463295 site05
0.4110856 0.1633670 0.7213300 0.9138609 site06
0.6184235 0.3801687 0.6171497 0.7860778 site07
0.4615650 0.2100628 0.7000236 0.8874722 site08
0.4282846 0.1808457 0.7177252 0.9039192 site09
0.5028080 0.2527864 0.6742314 0.8638105 site10
0.5056590 0.2527789 0.6734357 0.8634277 site11
0.6789290 0.4495558 0.5814396 0.7412934 site12
0.4856282 0.2338633 0.6846422 0.8746792 site13
0.3714416 0.1290946 0.7449125 0.9324399 site14
0.5399691 0.2913159 0.6641652 0.8408262 site15
0.3593052 0.1130117 0.7340652 0.9413209 site16
0.4069269 0.1549192 0.7415909 0.9184467 site17
0.4333639 0.1773149 0.7089887 0.9058161 site18
0.5454390 0.2961545 0.6714145 0.8381401 site19
0.5431077 0.2949598 0.6648759 0.8390482 site20
0.5461596 0.2980361 0.6480221 0.8370681 site21
ses_baseline_metric_avg <- average_metric_one_mod(metric_list =ses_baseline_metric)

avg_table_var_names <- c("correlation (sd)", "tradrsq (sd)","MAE (sd)","RMSE (sd)"  )


ses_baseline_metric_avg_table <- ses_baseline_metric_avg %>%
  mutate_if(is.numeric, round, digits=3)%>%
  mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
  mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
  mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
  mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
  select_if(is.character)
  
  ses_baseline_metric_avg_table%>%
    dplyr::select(all_of(avg_table_var_names))%>%
    kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in baseline")) %>%
    kableExtra::kable_classic(full_width = F, 
                             html_font = "Cambria")
metrics for modalities averaged across sites in baseline
correlation (sd) tradrsq (sd) MAE (sd) RMSE (sd)
0.486 (0.08) 0.238 (0.083) 0.687 (0.041) 0.871 (0.049)

4.2 Followup model fitting

### fit the enet model
### followup
ses_followup_recipe_list <- purrr::map(.x = processed_ses_followup_train_select,
                             ~recipe_prep(train_input=.x, features_input = dummy_features)) 

ses_pls_fit_followup <-purrr::map(.x=ses_followup_recipe_list,
                              ~pls_tune(recipe_input = .x,feature_input =dummy_features )) 

ses_pls_fit_followup_wf <- purrr::map(ses_pls_fit_followup,"pls_final_wf")


ses_pls_model_fit_followup <- pmap(list(ses_followup_recipe_list,
                                        ses_pls_fit_followup_wf,
                                        processed_ses_followup_test_select),~
                                                 model_final_fit(recipe_input = ..1, 
                                    wf_input = ..2,
                                    test_data = ..3)) 


ses_pls_pred_followup <- purrr::map(ses_pls_model_fit_followup,"model_predict")

ses_pls_model_fit_followup_train <- pmap(list(ses_followup_recipe_list,
                                        ses_pls_fit_followup_wf,
                                        processed_ses_followup_train_select),~
                                                 model_final_fit(recipe_input = ..1, 
                                    wf_input = ..2,
                                    test_data = ..3)) 

ses_pls_pred_followup_train <- purrr::map(ses_pls_model_fit_followup_train,"model_predict")


ses_followup_metric <- purrr::map2(.x=ses_pls_pred_followup,
     .y=processed_ses_followup_test,~metric_compute_site(data_input =.x ,
                                           site_input = .y)) %>%
                      do.call(rbind,.)

      ses_followup_metric%>% 
    kableExtra::kbl(caption = "metrics for all sites in followup") %>%
    kableExtra::kable_classic(full_width = F, 
                             html_font = "Cambria")
metrics for all sites in followup
correlation tradrsq MAE RMSE site
0.5609364 0.3099483 0.6578762 0.8287038 site01
0.3933879 0.1465595 0.7423183 0.9226148 site02
0.4693260 0.2192417 0.7112744 0.8823974 site03
0.4848541 0.2324355 0.6873146 0.8753572 site04
0.4999528 0.2419532 0.7086454 0.8687557 site05
0.3927633 0.1489401 0.7207011 0.9213176 site06
0.5975769 0.3531784 0.6402928 0.8011529 site07
0.4031141 0.1550298 0.7219744 0.9168504 site08
0.3874066 0.1368465 0.7304625 0.9269844 site09
0.4869327 0.2267327 0.6859717 0.8784987 site10
0.4715110 0.2112905 0.7035412 0.8858013 site11
0.6377458 0.3774529 0.6222828 0.7876826 site12
0.4072029 0.1456935 0.7135677 0.9231990 site13
0.4178285 0.1636509 0.7234086 0.9134719 site14
0.6179106 0.3770292 0.6136548 0.7878166 site15
0.3582303 0.1101277 0.7489837 0.9425826 site16
0.2845979 0.0469678 0.7740788 0.9748955 site17
0.4256326 0.1699855 0.7334627 0.9090941 site18
0.5330800 0.2836585 0.6571195 0.8452189 site19
0.5625736 0.3097989 0.6645845 0.8300251 site20
0.4699542 0.2125067 0.6706179 0.8862580 site21
ses_followup_metric_avg <- average_metric_one_mod(metric_list =ses_followup_metric)

ses_followup_metric_avg_table <- ses_followup_metric_avg %>%
  mutate_if(is.numeric, round, digits=3)%>%
  mutate("correlation (sd)" = paste0(correlation," (",cor_sd,")"))%>%
  mutate("tradrsq (sd)" = paste0(tradrsq," (",rsq_sd,")"))%>%
  mutate("MAE (sd)" = paste0(MAE," (",mae_sd,")"))%>%
  mutate("RMSE (sd)" = paste0(RMSE," (",rmse_sd,")"))%>%
  select_if(is.character)
  
  ses_followup_metric_avg_table%>%
    dplyr::select(all_of(avg_table_var_names))%>%
    kableExtra::kbl(caption = paste0("metrics for modalities averaged across sites in followup")) %>%
    kableExtra::kable_classic(full_width = F, 
                             html_font = "Cambria")
metrics for modalities averaged across sites in followup
correlation (sd) tradrsq (sd) MAE (sd) RMSE (sd)
0.47 (0.091) 0.218 (0.09) 0.697 (0.043) 0.881 (0.052)

4.3 Plotting the trace of performance metric against the number of factors.

Baseline

## get the model grid
ses_pls_grid_baseline  <- purrr::map(ses_pls_fit_baseline,"pls_grid")
ses_pls_param_baseline  <- purrr::map(ses_pls_fit_baseline,"best_pls_model")

factor_metric_plot <- function(grid_input, param_input){
  selected_comp <- param_input$num_comp
  
  comp_plot <-  grid_input %>% 
  collect_metrics() %>% 
  ggplot(aes(num_comp, mean, col = .metric)) +
  geom_point() +
  geom_line() +
  geom_vline(xintercept = selected_comp, size=1.5)+
  scale_x_continuous(n.breaks = 26) +
  labs(x = "Number of components",
       y = "Indicator",
       title = "Plot of RMSE vs number of components ",
       subtitle = paste0("Optimal number of components is ", selected_comp)) +
 facet_grid(.metric ~.) +
  theme_few() +
  theme(legend.position = "none")

  return(comp_plot)
}


comp_metric_plot_baseline <- purrr::map2(.x = ses_pls_grid_baseline,
                                         .y = ses_pls_param_baseline,
                                         ~factor_metric_plot(grid_input= .x ,
                                                             param_input = .y))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
comp_metric_plot_baseline
## $site01

## 
## $site02

## 
## $site03

## 
## $site04

## 
## $site05

## 
## $site06

## 
## $site07

## 
## $site08

## 
## $site09

## 
## $site10

## 
## $site11

## 
## $site12

## 
## $site13

## 
## $site14

## 
## $site15

## 
## $site16

## 
## $site17

## 
## $site18

## 
## $site19

## 
## $site20

## 
## $site21

Followup

## get the model grid
ses_pls_grid_followup  <- purrr::map(ses_pls_fit_followup,"pls_grid")
ses_pls_param_followup  <- purrr::map(ses_pls_fit_followup,"best_pls_model")

comp_metric_plot_followup <- purrr::map2(.x = ses_pls_grid_followup,
                                         .y = ses_pls_param_followup,
                                         ~factor_metric_plot(grid_input= .x ,
                                                             param_input = .y))

comp_metric_plot_followup
## $site01

## 
## $site02

## 
## $site03

## 
## $site04

## 
## $site05

## 
## $site06

## 
## $site07

## 
## $site08

## 
## $site09

## 
## $site10

## 
## $site11

## 
## $site12

## 
## $site13

## 
## $site14

## 
## $site15

## 
## $site16

## 
## $site17

## 
## $site18

## 
## $site19

## 
## $site20

## 
## $site21

5 Feature importance for the whole data set

Loading tables for plotting

plotting_names <- read.csv(paste0(scriptfold,"Common_psy_gene_brain_all/NonBrainFeaturesRead.csv"))

## clean plotting names

plotting_names[72,1] <- "marital_X2"
plotting_names[73,1] <- "marital_X3"
plotting_names[74,1] <- "marital_X4"
plotting_names[75,1] <- "marital_X5"
plotting_names[76,1] <- "marital_X6"

names(plotting_names) <- c("feature_names","plotting_name")

5.1 Baseline

5.1.1 Variable importance plot for pls models at baseline

Model across all sites.

### combine the data set with the same train and test fold
data_all_site_baseline <- rbind(processed_ses_baseline_train_select[[1]],
                                processed_ses_baseline_test_select[[1]])
## retune the model
all_data_recipe_baseline <- recipe_prep_scale(train_input=data_all_site_baseline, 
                                        features_input = dummy_features)

all_data_fit_baseline <- pls_tune(recipe_input = all_data_recipe_baseline, 
                                        feature_input = dummy_features)

all_data_wf_baseline <- all_data_fit_baseline[["pls_final_wf"]]
## final fit the model
all_data_final_fit_baseline <- all_data_wf_baseline%>%
    parsnip::extract_spec_parsnip()%>%
    parsnip::fit(data = data_all_site_baseline, formula= as.formula("gfactor~."))
## get the coefficients
tidy_all_data_final_fit_baseline <- all_data_final_fit_baseline%>% 
  tidy()
### extract the number of components
all_data_param_baseline <-all_data_fit_baseline[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component

var_explained <- all_data_final_fit_baseline[["fit"]][["prop_expl_var"]][["X"]]

### plotting feature importance based on the model with all the data

comp_idx_vec <- c(1:all_data_param_baseline)

tidy_all_data_final_fit_baseline <- tidy_all_data_final_fit_baseline %>%
                                    rename(feature_names = term)
tidy_all_data_final_fit_baseline_with_name<- full_join(tidy_all_data_final_fit_baseline,
                                                       plotting_names, by = "feature_names")%>%
                                                filter(feature_names != "Y", # outcome variable col name
                                                       )%>% drop_na()
tidy_all_data_final_fit_baseline_list <- purrr::map(comp_idx_vec,
                                                    ~filter(tidy_all_data_final_fit_baseline_with_name,
                                                                         component == .)%>%
                                                          dplyr::select(all_of(c("plotting_name","value"))))

### get the variable order from the first component:

tidy_all_data_final_fit_baseline_reordered <- tidy_all_data_final_fit_baseline_list[[1]] %>% 
                                                                  arrange(value)

tidy_all_data_final_fit_baseline_reordered<- tidy_all_data_final_fit_baseline_reordered$plotting_name

pls_vi_plot_with_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[1]],
                                   var_input = var_explained[1],
                                   idx_input = comp_idx_vec[1],
                                   reorder_name = tidy_all_data_final_fit_baseline_reordered){
  ### arrange the data from small to large
  data_input <- data_input %>%
                mutate(plotting_name = as.factor(plotting_name))%>%
                  mutate(plotting_name = factor(plotting_name,
                                                levels =reorder_name))
  
    range_value <- range(data_input$value)
  var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
 var_title_short <- paste0(round(var_input,3)*100,"%")
  var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")

  bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
  geom_bar(stat="identity")+
  theme_classic() + 
    scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
                       breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
    labs(title = var_title_medium)+
theme(
  axis.title.x = element_blank(),
  axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
  axis.title.y = element_blank(),
  axis.text.y = element_text(size = 20),
  legend.text = element_blank(),
  plot.title = element_text(size=20))
  return(bar_plot)
}

comp_one_plot <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_baseline_list[[1]],
                                   var_input = var_explained[1],
                                   idx_input = comp_idx_vec[1])



pls_vi_plot_no_label <- function(data_input=tidy_all_data_final_fit_baseline_list[[2]],
                                   var_input = var_explained[2],
                                   idx_input = comp_idx_vec[2],
                                 reorder_name = tidy_all_data_final_fit_baseline_reordered){
  ### arrange the data from small to large
  data_input <- data_input %>%
                mutate(plotting_name = as.factor(plotting_name))%>%
                  mutate(plotting_name = factor(plotting_name,
                                                levels =reorder_name))
  
  range_value <- range(data_input$value)
  
  var_title_long <- paste0("component ", idx_input," var explained ",round(var_input,3)*100,"%")
  var_title_short <- paste0(round(var_input,3)*100,"%")
    var_title_medium <- paste0("comp ", idx_input," \n ",round(var_input,3)*100,"%")

  bar_plot <- ggplot(data_input, aes(x=.data[["value"]], y=plotting_name)) +
  geom_bar(stat="identity")+
    scale_x_continuous(limits = c(round(range_value[1],2)-0.05, round(range_value[2],2)+0.05),
                       breaks = c(round(range_value[1],2)-0.05,0, round(range_value[2],2)+0.05))+
  theme_classic() + 
    labs(title = var_title_medium)+
theme(
  axis.title.x = element_blank(),
  axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
  axis.title.y = element_blank(),
  axis.text.y = element_blank(),
  legend.text = element_blank(),
  plot.title = element_text(size=20),
  axis.ticks = element_blank())
  return(bar_plot)
}


comp_other_plots <- purrr::pmap(list(tidy_all_data_final_fit_baseline_list[2:all_data_param_baseline],
                                     var_explained[2:all_data_param_baseline],
                                     comp_idx_vec[2:all_data_param_baseline
                                                  ]),~pls_vi_plot_no_label(data_input=..1,
                                   var_input = ..2,
                                   idx_input = ..3)) 
comp_other_plots_combined <- ggpubr::ggarrange(plotlist=comp_other_plots, nrow =1,ncol = length(var_explained)-1)

comp_plots_all <- gridExtra::grid.arrange(comp_one_plot,comp_other_plots_combined,nrow = 1, ncol = 2, widths = c(4.5, 4))

comp_plots_all
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]

5.1.2 Univariate correlations for baseline data

corr_baseline_train <- processed_ses_baseline_train_select[[1]]
corr_baseline_test <- processed_ses_baseline_test_select[[1]]


corr_data_all <- rbind(corr_baseline_train,corr_baseline_test)
corr_features <- corr_data_all %>% dplyr::select(-"gfactor") %>%colnames()

 corr_all_features <- purrr::map(.x = corr_features,~cor(corr_data_all[[.x]],corr_data_all[["gfactor"]]))%>% 
                                          do.call(rbind,.)%>% as.numeric()
 
 corr_all_features_cor_test <- purrr::map(.x = corr_features,
                            ~cor.test(corr_data_all[[.x]],corr_data_all[["gfactor"]],method="pearson"))
 
 corr_all_features_ci <- purrr::map(corr_all_features_cor_test,"conf.int")%>% 
                                          do.call(rbind,.)%>% tibble::as_tibble()%>%
                                          rename(low=V1,upp=V2)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
  corr_output_tibble <- tibble(feature_names = corr_features,value = corr_all_features)
corr_output_tibble <- cbind(corr_output_tibble,corr_all_features_ci)
  
 corr_baseline_all_sites_names <- full_join(corr_output_tibble,plotting_names, by = "feature_names")%>% drop_na()

   corr_baseline_all_sites_names%>%
 mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
  ggplot(aes(x = plotting_name, y = value))+
    geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
    geom_errorbar( aes(x=plotting_name, 
                   ymin=low, 
                   ymax=upp),
               width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
      coord_flip()+
    theme_classic() + 
  labs(y =paste0( "Correlation ") , x = "") +
    theme(axis.title.x= element_text(size = 20),
          axis.title.y= element_text(size = 20),
          axis.text.y = element_text(size = 20),
          axis.text.x = element_text(size = 20))

corr_baseline_all_sites_names_for_all <-corr_baseline_all_sites_names%>%
     mutate(plotting_name = as.factor(plotting_name))%>%
                  mutate(plotting_name = factor(plotting_name,
                                                levels =tidy_all_data_final_fit_baseline_reordered))
  
  
corr_bar_plot_baseline <-   corr_baseline_all_sites_names_for_all%>%
 #mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
  ggplot(aes(x = plotting_name, y = value))+
    geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
    geom_errorbar( aes(x=plotting_name, 
                   ymin=low, 
                   ymax=upp),
               width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
  scale_y_continuous(limits = c(-0.35, 0.45),
                       breaks = c(-0.35,0, 0.45))+
      coord_flip()+
    theme_classic() + 
    labs(title = "Univariate \ncorrelations")+
theme(
  axis.title.x = element_blank(),
  axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
  axis.title.y = element_blank(),
  axis.text.y = element_blank(),
  legend.text = element_blank(),
  plot.title = element_text(size=20),
  axis.ticks = element_blank())

5.1.3 Join correlation plot with univariate together

vi_pls_plot_baseline_all <-gridExtra::grid.arrange(comp_plots_all,corr_bar_plot_baseline,nrow = 1, ncol = 2, widths = c(7, 1)) 

vi_pls_plot_baseline_all
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (1-1,2-2) arrange  gtable[layout]

5.2 Followup

5.2.1 Variable importance plot for pls models at baseline

Model across all sites.

data_all_site_followup <- rbind(processed_ses_followup_train_select[[1]],
                                processed_ses_followup_test_select[[1]])

all_data_recipe_followup <- recipe_prep_scale(train_input=data_all_site_followup, 
                                        features_input = dummy_features)

## follow the function use a more parsimonious model 
## cut at the number of component that does not reduce 0.1% of the RMSE
all_data_fit_followup <- pls_tune(recipe_input = all_data_recipe_followup, 
                                        feature_input = dummy_features)

all_data_wf_followup <- all_data_fit_followup[["pls_final_wf"]]

all_data_final_fit_followup <- all_data_wf_followup%>%
    parsnip::extract_spec_parsnip()%>%
    parsnip::fit(data = data_all_site_followup, formula= as.formula("gfactor~."))

tidy_all_data_final_fit_followup <- all_data_final_fit_followup%>% 
  tidy()

all_data_param_followup <-all_data_fit_followup[["best_pls_model"]][["num_comp"]]
### extract the variance explained by each component

var_explained_followup <- all_data_final_fit_followup[["fit"]][["prop_expl_var"]][["X"]]

### plotting feature importance based on the model with all the data

comp_idx_vec_followup <- c(1:all_data_param_followup)

tidy_all_data_final_fit_followup <- tidy_all_data_final_fit_followup %>%
                                    rename(feature_names = term)
tidy_all_data_final_fit_followup_with_name<- full_join(tidy_all_data_final_fit_followup,
                                                       plotting_names, by = "feature_names")%>%
                                                filter(feature_names != "Y", # outcome variable col name
                                                       )
tidy_all_data_final_fit_followup_list <- purrr::map(comp_idx_vec_followup,
                                                    ~filter(tidy_all_data_final_fit_followup_with_name,
                                                                         component == .)%>%
                                                          dplyr::select(all_of(c("plotting_name","value"))))

### get the variable order from the first component:

tidy_all_data_final_fit_followup_reordered <- tidy_all_data_final_fit_followup_list[[1]] %>% 
                                                                  arrange(value)

tidy_all_data_final_fit_followup_reordered<- tidy_all_data_final_fit_followup_reordered$plotting_name


comp_one_plot_followup <- pls_vi_plot_with_label(data_input=tidy_all_data_final_fit_followup_list[[1]],
                                   var_input = var_explained_followup[1],
                                   idx_input = comp_idx_vec_followup[1],
                                   reorder_name=tidy_all_data_final_fit_followup_reordered)


comp_other_plots_followup <- purrr::pmap(list(tidy_all_data_final_fit_followup_list[2:all_data_param_followup],
                                     var_explained_followup[2:all_data_param_followup],
                                     comp_idx_vec_followup[2:all_data_param_followup]
                                     ),~pls_vi_plot_no_label(data_input=..1,
                                   var_input = ..2,
                                   idx_input = ..3,
                                   reorder_name=tidy_all_data_final_fit_followup_reordered)) 
comp_other_plots_combined_followup <- ggpubr::ggarrange(plotlist=comp_other_plots_followup,
                                                        nrow =1,ncol = length(var_explained_followup)-1)

comp_plots_all_followup <- gridExtra::grid.arrange(comp_one_plot_followup,
                                                   comp_other_plots_combined_followup,
                                                   nrow = 1, ncol = 2, widths = c(4.5, 4))

comp_plots_all_followup
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]

5.2.2 plotting univariate correlation plots across sites at followup

The bar plot for correlations across all sites. Train and test fold are joined together.

corr_data_all_followup <- rbind(processed_ses_followup_train[[1]],processed_ses_followup_test[[1]])

 corr_all_features_followup <- purrr::map(.x = dummy_features,~cor(corr_data_all_followup[[.x]],
                                                             corr_data_all_followup[["gfactor"]]))%>% 
                                          do.call(rbind,.)%>% as.numeric()
 
 corr_all_features_cor_test_followup <- purrr::map(.x = dummy_features,
                            ~cor.test(corr_data_all_followup[[.x]],
                                      corr_data_all_followup[["gfactor"]],method="pearson"))
 
 corr_all_features_ci_followup <- purrr::map(corr_all_features_cor_test_followup,"conf.int")%>% 
                                          do.call(rbind,.)%>% tibble::as_tibble()%>%
                                          rename(low=V1,upp=V2)
                                          
   
 
corr_output_tibble_followup <- tibble(feature_names = dummy_features,value = corr_all_features_followup)
corr_output_tibble_followup <- cbind(corr_output_tibble_followup,corr_all_features_ci_followup)
  
  corr_followup_all_sites_names <- full_join(corr_output_tibble_followup,
                                                      plotting_names, by = "feature_names")%>% drop_na()
  
  
   corr_followup_all_sites_names%>%
 mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
  ggplot(aes(x = plotting_name, y = value))+
    geom_bar(stat = "identity",fill="gray30",alpha = 0.7)+
    geom_errorbar( aes(x=plotting_name, 
                   ymin=low, 
                   ymax=upp),
               width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
      coord_flip()+
    theme_classic() + 
  labs(y =paste0( "Correlation ") , x = "") +
    theme(axis.title.x= element_text(size = 20),
          axis.title.y= element_text(size = 20),
          axis.text.y = element_text(size = 15),
          axis.text.x = element_text(size = 20))

corr_followup_all_sites_names_for_all <-corr_followup_all_sites_names%>%
     mutate(plotting_name = as.factor(plotting_name))%>%
                  mutate(plotting_name = factor(plotting_name,
                                                levels =tidy_all_data_final_fit_followup_reordered))
  
  
corr_bar_plot_followup <-   corr_followup_all_sites_names_for_all%>%
 #mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
  ggplot(aes(x = plotting_name, y = value))+
    geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
    geom_errorbar( aes(x=plotting_name, 
                   ymin=low, 
                   ymax=upp),
               width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
  scale_y_continuous(limits = c(-0.35, 0.45),
         breaks = c(-0.35,0, 0.45))+##use this because all the plots should have the same height
      coord_flip()+
    theme_classic() + 
    labs(title = "Univariate \ncorrelations")+
theme(
  axis.title.x = element_blank(),
  axis.text.x = element_text(size = 20,angle = 60,vjust = 0.5),
  axis.title.y = element_blank(),
  axis.text.y = element_blank(),
  legend.text = element_blank(),
  plot.title = element_text(size=20),
  axis.ticks = element_blank())

5.3 join correlation plot with univariate together

vi_pls_plot_followup_all <-gridExtra::grid.arrange(comp_plots_all_followup,
                                                   corr_bar_plot_followup,
                                                   nrow = 1, ncol = 2, widths = c(7, 1)) 

vi_pls_plot_followup_all
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name            grob
## 1 1 (1-1,1-1) arrange gtable[arrange]
## 2 2 (1-1,2-2) arrange  gtable[layout]

Combine baseline and followup together

vi_pls_plot_baseline_label <- vi_pls_plot_baseline_all %>%
                      ggpubr::annotate_figure(top = ggpubr::text_grob("Baseline",size=20,face = "bold",hjust=3.2))


vi_pls_plot_followup_label <- vi_pls_plot_followup_all %>%
                    ggpubr::annotate_figure(top = ggpubr::text_grob("Followup",size=20,face = "bold",hjust=3))



vi_pls_plot_label <- ggpubr::ggarrange(vi_pls_plot_baseline_label,vi_pls_plot_followup_label,nrow = 2)

title_vi_pls_plot <- ggpubr::annotate_figure(vi_pls_plot_label,
                        top = ggpubr::text_grob("Feature importance of Partial Least Squares Regressions Predicting Cognitive \nAbilities from Social-Demographics, Lifestyles and Developments Variables",size=25, face = "bold")) 

title_vi_pls_plot

Change the order of the followup plots

### get the variable order from the first component:
comp_plots_baseline_followup <- purrr::pmap(list(tidy_all_data_final_fit_followup_list[1:all_data_param_followup],
                                     var_explained_followup[1:all_data_param_followup],
                                     comp_idx_vec_followup[1:all_data_param_followup]
                                     ),~pls_vi_plot_no_label(data_input=..1,
                                   var_input = ..2,
                                   idx_input = ..3,
                                   reorder_name=tidy_all_data_final_fit_baseline_reordered)) 
comp_plots_combined_baseline_followup <- ggpubr::ggarrange(plotlist=comp_plots_baseline_followup,
                                                        nrow =1,ncol = length(var_explained_followup))

comp_plots_combined_baseline_followup

corr_followup_baseline_all_sites_names_for_all <-corr_followup_all_sites_names%>%
     mutate(plotting_name = as.factor(plotting_name))%>%
                  mutate(plotting_name = factor(plotting_name,
                                                levels =tidy_all_data_final_fit_baseline_reordered))
  
  
corr_bar_plot_followup_baseline <-   corr_followup_baseline_all_sites_names_for_all%>%
 #mutate(plotting_name = fct_reorder(plotting_name, value,.fun = "max"))%>%
  ggplot(aes(x = plotting_name, y = value))+
    geom_bar(stat = "identity",fill="gray40",alpha = 0.7)+
    geom_errorbar( aes(x=plotting_name, 
                   ymin=low, 
                   ymax=upp),
               width=0.4, colour="black", alpha=0.9, linewidth=1.3)+
  scale_y_continuous(limits = c(-0.35, 0.45),
         breaks = c(-0.35,0, 0.45))+##use this because all the plots should have the same height
      coord_flip()+
    theme_classic() + 
    labs(title = "Univariate \ncorrelations")+
theme(
  axis.title.x = element_blank(),
  axis.text.x = element_text(size = 12,angle = 60,vjust = 0.5),
  axis.title.y = element_blank(),
  axis.text.y = element_blank(),
  legend.text = element_blank(),
  plot.title = element_text(size=15),
  axis.ticks = element_blank())

5.4 join correlation plot with univariate together

vi_pls_plot_followup_baseline_all <-gridExtra::grid.arrange(comp_plots_combined_baseline_followup,
                                                   corr_bar_plot_followup_baseline,
                                                   nrow = 1, ncol = 2, widths = c(8, 1.4)) 

vi_pls_plot_followup_baseline_all
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
vi_pls_plot_baseline_no_label <- vi_pls_plot_baseline_all %>%
                            ggpubr::annotate_figure(top = ggpubr::text_grob("Baseline",size=20,hjust=2.5))


vi_pls_plot_followup_no_label <- vi_pls_plot_followup_baseline_all %>%
                            ggpubr::annotate_figure(top = ggpubr::text_grob("Followup",size=20,hjust=2.5))



vi_pls_plot_label <- ggpubr::ggarrange(vi_pls_plot_baseline_no_label,vi_pls_plot_followup_no_label,ncol = 2,widths = c(1.5,1))

title_vi_pls_plot <- ggpubr::annotate_figure(vi_pls_plot_label,
                        top = ggpubr::text_grob("Feature importance of Partial Least Squares Regressions \nPredicting Cognitive Abilities from Social-Demographics, \nLifestyles and Developments Variables",size=25, face = "bold")) 

title_vi_pls_plot

5.4.1 save the output

output_list <- list(baseline_train_pred = ses_pls_pred_baseline_train,
                    baseline_test_pred = ses_pls_pred_baseline,
                    baseline_train_data = processed_ses_baseline_train,
                    baseline_test_data = processed_ses_baseline_test,
                    followup_train_pred = ses_pls_pred_followup_train,
                    followup_test_pred = ses_pls_pred_followup,
                    followup_train_data =processed_ses_followup_train ,
                    followup_test_data = processed_ses_followup_test)
saveRDS(output_list,paste0(scriptfold,'genetics_psychopathology_common_scan_all_scripts/ses_pls_pred', '.RData'))

save the metrics

ses_baseline_metric_outout_table <- ses_baseline_metric_avg_table %>% mutate(event = "baseline")


ses_followup_metric_output_table<- ses_followup_metric_avg_table %>% mutate(event = "followup")

output_table <- bind_rows(ses_baseline_metric_outout_table,ses_followup_metric_output_table)%>%
                mutate(modality = "Social Demo Lifestyle Dev")
saveRDS(output_table,paste0(scriptfold,'Common_psy_gene_brain_all/saved_outputs/performance_metrics/ses_performance_metric', '.RData'))